[gcc r16-1623] match: Simplify doubled not, negate and conjugate operators to a non-lvalue
https://gcc.gnu.org/g:7e4d55f3030e4840129001a2a9a4d656a6c18da5 commit r16-1623-g7e4d55f3030e4840129001a2a9a4d656a6c18da5 Author: Mikael Morin Date: Sat Jun 21 20:12:31 2025 +0200 match: Simplify doubled not, negate and conjugate operators to a non-lvalue gcc/ChangeLog: * match.pd (`-(-X)`, `~(~X)`, `conj(conj(X))`): Add a NON_LVALUE_EXPR wrapper to the simplification of doubled unary operators NEGATE_EXPR, BIT_NOT_EXPR and CONJ_EXPR. gcc/testsuite/ChangeLog: * gfortran.dg/non_lvalue_1.f90: New test. Diff: --- gcc/match.pd | 6 +++--- gcc/testsuite/gfortran.dg/non_lvalue_1.f90 | 32 ++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 0f53c162fce3..f4416d9172c4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2357,7 +2357,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* ~~x -> x */ (simplify (bit_not (bit_not @0)) - @0) + (non_lvalue @0)) /* zero_one_valued_p will match when a value is known to be either 0 or 1 including constants 0 or 1. @@ -4037,7 +4037,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (negate (nop_convert? (negate @1))) (if (!TYPE_OVERFLOW_SANITIZED (type) && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1))) - (view_convert @1))) + (non_lvalue (view_convert @1 /* We can't reassociate floating-point unless -fassociative-math or fixed-point plus or minus because of saturation to +-Inf. */ @@ -5767,7 +5767,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (conj (convert? (conj @0))) (if (tree_nop_conversion_p (TREE_TYPE (@0), type)) - (convert @0))) + (non_lvalue (convert @0 /* conj({x,y}) -> {x,-y} */ (simplify diff --git a/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 new file mode 100644 index ..61dad5a2ce1b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 @@ -0,0 +1,32 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } +! +! Check the generation of NON_LVALUE_EXPR expressions in cases where a unary +! operator expression would simplify to a bare data reference. + +! A NON_LVALUE_EXPR is generated for a double negation that would simplify to +! a bare data reference. +function f1 (f1_arg1) + integer, value :: f1_arg1 + integer :: f1 + f1 = -(-f1_arg1) +end function +! { dg-final { scan-tree-dump "__result_f1 = NON_LVALUE_EXPR ;" "original" } } + +! A NON_LVALUE_EXPR is generated for a double complement that would simplify to +! a bare data reference. +function f2 (f2_arg1) + integer, value :: f2_arg1 + integer :: f2 + f2 = not(not(f2_arg1)) +end function +! { dg-final { scan-tree-dump "__result_f2 = NON_LVALUE_EXPR ;" "original" } } + +! A NON_LVALUE_EXPR is generated for a double complex conjugate that would +! simplify to a bare data reference. +function f3 (f3_arg1) + complex, value :: f3_arg1 + complex :: f3 + f3 = conjg(conjg(f3_arg1)) +end function +! { dg-final { scan-tree-dump "__result_f3 = NON_LVALUE_EXPR ;" "original" } }
[gcc] Created branch 'mikael/heads/non_lvalue_v05' in namespace 'refs/users'
The branch 'mikael/heads/non_lvalue_v05' was created in namespace 'refs/users' pointing to: 7e4d55f3030e... match: Simplify doubled not, negate and conjugate operators
[gcc(refs/users/mikael/heads/non_lvalue_v05)] match: Simplify doubled not, negate and conjugate operators to a non-lvalue
https://gcc.gnu.org/g:7e4d55f3030e4840129001a2a9a4d656a6c18da5 commit 7e4d55f3030e4840129001a2a9a4d656a6c18da5 Author: Mikael Morin Date: Sat Jun 21 20:12:31 2025 +0200 match: Simplify doubled not, negate and conjugate operators to a non-lvalue gcc/ChangeLog: * match.pd (`-(-X)`, `~(~X)`, `conj(conj(X))`): Add a NON_LVALUE_EXPR wrapper to the simplification of doubled unary operators NEGATE_EXPR, BIT_NOT_EXPR and CONJ_EXPR. gcc/testsuite/ChangeLog: * gfortran.dg/non_lvalue_1.f90: New test. Diff: --- gcc/match.pd | 6 +++--- gcc/testsuite/gfortran.dg/non_lvalue_1.f90 | 32 ++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 0f53c162fce3..f4416d9172c4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2357,7 +2357,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* ~~x -> x */ (simplify (bit_not (bit_not @0)) - @0) + (non_lvalue @0)) /* zero_one_valued_p will match when a value is known to be either 0 or 1 including constants 0 or 1. @@ -4037,7 +4037,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (negate (nop_convert? (negate @1))) (if (!TYPE_OVERFLOW_SANITIZED (type) && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1))) - (view_convert @1))) + (non_lvalue (view_convert @1 /* We can't reassociate floating-point unless -fassociative-math or fixed-point plus or minus because of saturation to +-Inf. */ @@ -5767,7 +5767,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (conj (convert? (conj @0))) (if (tree_nop_conversion_p (TREE_TYPE (@0), type)) - (convert @0))) + (non_lvalue (convert @0 /* conj({x,y}) -> {x,-y} */ (simplify diff --git a/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 new file mode 100644 index ..61dad5a2ce1b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 @@ -0,0 +1,32 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } +! +! Check the generation of NON_LVALUE_EXPR expressions in cases where a unary +! operator expression would simplify to a bare data reference. + +! A NON_LVALUE_EXPR is generated for a double negation that would simplify to +! a bare data reference. +function f1 (f1_arg1) + integer, value :: f1_arg1 + integer :: f1 + f1 = -(-f1_arg1) +end function +! { dg-final { scan-tree-dump "__result_f1 = NON_LVALUE_EXPR ;" "original" } } + +! A NON_LVALUE_EXPR is generated for a double complement that would simplify to +! a bare data reference. +function f2 (f2_arg1) + integer, value :: f2_arg1 + integer :: f2 + f2 = not(not(f2_arg1)) +end function +! { dg-final { scan-tree-dump "__result_f2 = NON_LVALUE_EXPR ;" "original" } } + +! A NON_LVALUE_EXPR is generated for a double complex conjugate that would +! simplify to a bare data reference. +function f3 (f3_arg1) + complex, value :: f3_arg1 + complex :: f3 + f3 = conjg(conjg(f3_arg1)) +end function +! { dg-final { scan-tree-dump "__result_f3 = NON_LVALUE_EXPR ;" "original" } }
[gcc] Deleted branch 'mikael/heads/non_lvalue_v05' in namespace 'refs/users'
The branch 'mikael/heads/non_lvalue_v05' in namespace 'refs/users' was deleted. It previously pointed to: 85b4eb8956df... match: Simplify doubled not, negate and conjugate operators Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- 85b4eb8... match: Simplify doubled not, negate and conjugate operators
[gcc r16-1621] vregs: Use force_subreg when instantiating subregs [PR120721]
https://gcc.gnu.org/g:8130a2ad91ca8571b099ba020443fadab7a688ca commit r16-1621-g8130a2ad91ca8571b099ba020443fadab7a688ca Author: Richard Sandiford Date: Mon Jun 23 08:46:27 2025 +0100 vregs: Use force_subreg when instantiating subregs [PR120721] In this PR, we started with: (subreg:V2DI (reg:DI virtual-reg) 0) and vregs instantiated the virtual register to the argument pointer. But: (subreg:V2DI (reg:DI ap) 0) is not a sensible subreg, since the argument pointer certainly can't be referenced in V2DImode. This is (IMO correctly) rejected after g:2dcc6dbd8a00caf7cfa8cac17b3fd1c33d658016. The vregs code that instantiates the subreg above is specific to rvalues and already creates new instructions for nonzero offsets. It is therefore safe to use force_subreg instead of simplify_gen_subreg. I did wonder whether we should instead say that a subreg of a virtual register is invalid if the same subreg would be invalid for the associated hard registers. But the point of virtual registers is that the offsets from the hard registers are not known until after expand has finished, and if an offset is nonzero, the virtual register will be instantiated into a pseudo that contains the sum of the hard register and the offset. The subreg would then be correct for that pseudo. The subreg is only invalid in this case because there is no offset. gcc/ PR rtl-optimization/120721 * function.cc (instantiate_virtual_regs_in_insn): Use force_subreg instead of simplify_gen_subreg when instantiating an rvalue SUBREG. gcc/testsuite/ PR rtl-optimization/120721 * g++.dg/torture/pr120721.C: New test. Diff: --- gcc/function.cc | 20 - gcc/testsuite/g++.dg/torture/pr120721.C | 39 + 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/gcc/function.cc b/gcc/function.cc index a3a74b44b916..48167b0c2072 100644 --- a/gcc/function.cc +++ b/gcc/function.cc @@ -1722,19 +1722,17 @@ instantiate_virtual_regs_in_insn (rtx_insn *insn) new_rtx = instantiate_new_reg (SUBREG_REG (x), &offset); if (new_rtx == NULL) continue; + start_sequence (); if (maybe_ne (offset, 0)) - { - start_sequence (); - new_rtx = expand_simple_binop - (GET_MODE (new_rtx), PLUS, new_rtx, -gen_int_mode (offset, GET_MODE (new_rtx)), -NULL_RTX, 1, OPTAB_LIB_WIDEN); - seq = end_sequence (); - emit_insn_before (seq, insn); - } - x = simplify_gen_subreg (recog_data.operand_mode[i], new_rtx, - GET_MODE (new_rtx), SUBREG_BYTE (x)); + new_rtx = expand_simple_binop + (GET_MODE (new_rtx), PLUS, new_rtx, + gen_int_mode (offset, GET_MODE (new_rtx)), + NULL_RTX, 1, OPTAB_LIB_WIDEN); + x = force_subreg (recog_data.operand_mode[i], new_rtx, + GET_MODE (new_rtx), SUBREG_BYTE (x)); gcc_assert (x); + seq = end_sequence (); + emit_insn_before (seq, insn); break; default: diff --git a/gcc/testsuite/g++.dg/torture/pr120721.C b/gcc/testsuite/g++.dg/torture/pr120721.C new file mode 100644 index ..37dc46cb1187 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr120721.C @@ -0,0 +1,39 @@ +// { dg-additional-options "-w -fno-vect-cost-model" } + +template struct integral_constant { + static constexpr int value = __v; +}; +template using __bool_constant = integral_constant<__v>; +template using enable_if_t = int; +struct function_ref { + template + function_ref( + Callable, + enable_if_t<__bool_constant<__is_same(int, int)>::value> * = nullptr); +}; +struct ArrayRef { + int Data; + long Length; + int *begin(); + int *end(); +}; +struct StringRef { + char Data; + long Length; +}; +void attributeObject(function_ref); +struct ScopedPrinter { + virtual void printBinaryImpl(StringRef, StringRef, ArrayRef, bool, unsigned); +}; +struct JSONScopedPrinter : ScopedPrinter { + JSONScopedPrinter(); + void printBinaryImpl(StringRef, StringRef, ArrayRef Value, bool, + unsigned StartOffset) { +attributeObject([&] { + StartOffset; + for (char Val : Value) +; +}); + } +}; +JSONScopedPrinter::JSONScopedPrinter() {}
[gcc r16-1624] vect: Use combined peeling and versioning for mutually aligned DRs
https://gcc.gnu.org/g:6deab186535a5aa9f930e2db637089865d0bc4ff commit r16-1624-g6deab186535a5aa9f930e2db637089865d0bc4ff Author: Pengfei Li Date: Wed Jun 11 15:01:36 2025 + vect: Use combined peeling and versioning for mutually aligned DRs Current GCC uses either peeling or versioning, but not in combination, to handle unaligned data references (DRs) during vectorization. This limitation causes some loops with early break to fall back to scalar code at runtime. Consider the following loop with DRs in its early break condition: for (int i = start; i < end; i++) { if (a[i] == b[i]) break; count++; } In the loop, references to a[] and b[] need to be strictly aligned for vectorization because speculative reads that may cross page boundaries are not allowed. Current GCC does versioning for this loop by creating a runtime check like: ((&a[start] | &b[start]) & mask) == 0 to see if two initial addresses both have lower bits zeros. If above runtime check fails, the loop will fall back to scalar code. However, it's often possible that DRs are all unaligned at the beginning but they become all aligned after a few loop iterations. We call this situation DRs being "mutually aligned". This patch enables combined peeling and versioning to avoid loops with mutually aligned DRs falling back to scalar code. Specifically, the function vect_peeling_supportable is updated in this patch to return a three-state enum indicating how peeling can make all unsupportable DRs aligned. In addition to previous true/false return values, a new state peeling_maybe_supported is used to indicate that peeling may be able to make these DRs aligned but we are not sure about it at compile time. In this case, peeling should be combined with versioning so that a runtime check will be generated to guard the peeled vectorized loop. A new type of runtime check is also introduced for combined peeling and versioning. It's enabled when LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT is true. The new check tests if all DRs recorded in LOOP_VINFO_MAY_MISALIGN_STMTS have the same lower address bits. For above loop case, the new test will generate an XOR between two addresses, like: ((&a[start] ^ &b[start]) & mask) == 0 Therefore, if a and b have the same alignment step (element size) and the same offset from an alignment boundary, a peeled vectorized loop will run. This new runtime check also works for >2 DRs, with the LHS expression being: ((a1 ^ a2) | (a2 ^ a3) | (a3 ^ a4) | ... | (an-1 ^ an)) & mask where ai is the address of i'th DR. This patch is bootstrapped and regression tested on x86_64-linux-gnu, arm-linux-gnueabihf and aarch64-linux-gnu. gcc/ChangeLog: * tree-vect-data-refs.cc (vect_peeling_supportable): Return new enum values to indicate if combined peeling and versioning can potentially support vectorization. (vect_enhance_data_refs_alignment): Support combined peeling and versioning in vectorization analysis. * tree-vect-loop-manip.cc (vect_create_cond_for_align_checks): Add a new type of runtime check for mutually aligned DRs. * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Set default value of allow_mutual_alignment in the initializer list. * tree-vectorizer.h (enum peeling_support): Define type of peeling support for function vect_peeling_supportable. (LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT): New access macro. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-early-break_133_pfa6.c: Adjust test. Diff: --- .../gcc.dg/vect/vect-early-break_133_pfa6.c| 2 +- gcc/tree-vect-data-refs.cc | 168 - gcc/tree-vect-loop-manip.cc| 98 +--- gcc/tree-vect-loop.cc | 1 + gcc/tree-vectorizer.h | 16 ++ 5 files changed, 223 insertions(+), 62 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c index ee123df6ed2b..7787d037d9dc 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c @@ -20,4 +20,4 @@ unsigned test4(char x, char *vect_a, char *vect_b, int n) return ret; } -/* { dg-final { scan-tree-dump "Versioning for alignment will be applied" "vect" } } */ +/* { dg-final { scan-tree-dump "Both peeling and versioning will be applied" "vect" } } */ diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 036903a948f6..ee040eb98881 100644 --- a/gcc/tree-vec
[gcc r16-1622] tree-optimization/120729 - limit compile time in uninit_analysis::prune_phi_opnds
https://gcc.gnu.org/g:97044a47de533f2a9b3fc864e5ea318e53979079 commit r16-1622-g97044a47de533f2a9b3fc864e5ea318e53979079 Author: Richard Biener Date: Fri Jun 20 15:07:20 2025 +0200 tree-optimization/120729 - limit compile time in uninit_analysis::prune_phi_opnds The testcase in this PR shows, on the GCC 14 branch, that in some degenerate cases we can spend exponential time pruning always initialized paths through a web of PHIs. The following adds --param uninit-max-prune-work, defaulted to 10, to limit that to effectively O(1). PR tree-optimization/120729 * gimple-predicate-analysis.h (uninit_analysis::prune_phi_opnds): Add argument of work budget remaining. * gimple-predicate-analysis.cc (uninit_analysis::prune_phi_opnds): Likewise. Maintain and honor it throughout the recursion. * params.opt (uninit-max-prune-work): New. * doc/invoke.texi (uninit-max-prune-work): Document. Diff: --- gcc/doc/invoke.texi | 3 +++ gcc/gimple-predicate-analysis.cc | 12 +--- gcc/gimple-predicate-analysis.h | 2 +- gcc/params.opt | 4 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index dec3c7a1b805..91b0a201e1b6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -17420,6 +17420,9 @@ predicate chain. @item uninit-max-num-chains Maximum number of predicates ored in the normalized predicate chain. +@item uninit-max-prune-work +Maximum amount of work done to prune paths where the variable is always initialized. + @item sched-autopref-queue-depth Hardware autoprefetcher scheduler model control flag. Number of lookahead cycles the model looks into; at ' diff --git a/gcc/gimple-predicate-analysis.cc b/gcc/gimple-predicate-analysis.cc index 76f6ab613107..b056b42a17ec 100644 --- a/gcc/gimple-predicate-analysis.cc +++ b/gcc/gimple-predicate-analysis.cc @@ -385,7 +385,8 @@ bool uninit_analysis::prune_phi_opnds (gphi *phi, unsigned opnds, gphi *flag_def, tree boundary_cst, tree_code cmp_code, hash_set *visited_phis, - bitmap *visited_flag_phis) + bitmap *visited_flag_phis, + unsigned &max_attempts) { /* The Boolean predicate guarding the PHI definition. Initialized lazily from PHI in the first call to is_use_guarded() and cached @@ -398,6 +399,10 @@ uninit_analysis::prune_phi_opnds (gphi *phi, unsigned opnds, gphi *flag_def, if (!MASK_TEST_BIT (opnds, i)) continue; + if (max_attempts == 0) + return false; + --max_attempts; + tree flag_arg = gimple_phi_arg_def (flag_def, i); if (!is_gimple_constant (flag_arg)) { @@ -432,7 +437,7 @@ uninit_analysis::prune_phi_opnds (gphi *phi, unsigned opnds, gphi *flag_def, unsigned opnds_arg_phi = m_eval.phi_arg_set (phi_arg_def); if (!prune_phi_opnds (phi_arg_def, opnds_arg_phi, flag_arg_def, boundary_cst, cmp_code, visited_phis, - visited_flag_phis)) + visited_flag_phis, max_attempts)) return false; bitmap_clear_bit (*visited_flag_phis, SSA_NAME_VERSION (phi_result)); @@ -634,9 +639,10 @@ uninit_analysis::overlap (gphi *phi, unsigned opnds, hash_set *visited, value that is in conflict with the use guard/predicate. */ bitmap visited_flag_phis = NULL; gphi *phi_def = as_a (flag_def); + unsigned max_attempts = param_uninit_max_prune_work; bool all_pruned = prune_phi_opnds (phi, opnds, phi_def, boundary_cst, cmp_code, visited, -&visited_flag_phis); +&visited_flag_phis, max_attempts); if (visited_flag_phis) BITMAP_FREE (visited_flag_phis); if (all_pruned) diff --git a/gcc/gimple-predicate-analysis.h b/gcc/gimple-predicate-analysis.h index f71061ec2836..67a19aa09052 100644 --- a/gcc/gimple-predicate-analysis.h +++ b/gcc/gimple-predicate-analysis.h @@ -152,7 +152,7 @@ private: bool is_use_guarded (gimple *, basic_block, gphi *, unsigned, hash_set *); bool prune_phi_opnds (gphi *, unsigned, gphi *, tree, tree_code, - hash_set *, bitmap *); + hash_set *, bitmap *, unsigned &); bool overlap (gphi *, unsigned, hash_set *, const predicate &); void collect_phi_def_edges (gphi *, basic_block, vec *, diff --git a/gcc/params.opt b/gcc/params.opt index a67f900a63f7..31aa0bd57533 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -1185,6 +1185,10 @@ predicate chain. Common Joined UInteger Var(param_uninit_max_num_chains) Init(8) IntegerRange(1,
[gcc] Created branch 'mikael/heads/non_lvalue_v05' in namespace 'refs/users'
The branch 'mikael/heads/non_lvalue_v05' was created in namespace 'refs/users' pointing to: 85b4eb8956df... match: Simplify doubled not, negate and conjugate operators
[gcc r15-9855] tailc: Allow musttail tail calls with -fsanitize=address [PR120608]
https://gcc.gnu.org/g:fa2e03effa5251a6f7c8b79a8e3be81c90fb5e4f commit r15-9855-gfa2e03effa5251a6f7c8b79a8e3be81c90fb5e4f Author: Jakub Jelinek Date: Mon Jun 23 16:08:34 2025 +0200 tailc: Allow musttail tail calls with -fsanitize=address [PR120608] These testcases show another problem with -fsanitize=address vs. musttail tail calls. In particular, there can be .ASAN_MARK (POISON, &a, 4); etc. calls after a tail call and those just prevent the tailc pass to mark the musttail calls as [tail call]. Normally, the sanopt pass (which comes after tailc) will optimize those away, the optimization is if there are no .ASAN_CHECK calls or normal function calls dominated by those .ASAN_MARK (POSION, ...) calls, the poison is not needed, because in the epilog sequence (the one dealt with in the patch posted earlier today) all the stack slots are unpoisoned anyway (or poisoned for use-after-return). Unlike __builtin_tsan_exit_function, .ASAN_MARK is not a real function and is always expanded inline, so can be never tail called successfully, so the patch just ignores those for the cfun->has_musttail && diag_musttail cases. If there is a non-musttail call, it will fail worst case during expansion because there is the epilog asan sequence. 2025-06-12 Jakub Jelinek PR middle-end/120608 * tree-tailcall.cc (empty_eh_cleanup): Ignore .ASAN_MARK (POISON) internal calls for the cfun->has_musttail case and diag_musttail. (find_tail_calls): Likewise. * c-c++-common/asan/pr120608-1.c: New test. * c-c++-common/asan/pr120608-2.c: New test. (cherry picked from commit 35a26f2ec55d20d524464c33b68b23328a7f6bbe) Diff: --- gcc/testsuite/c-c++-common/asan/pr120608-1.c | 43 gcc/testsuite/c-c++-common/asan/pr120608-2.c | 39 + gcc/tree-tailcall.cc | 16 +++ 3 files changed, 98 insertions(+) diff --git a/gcc/testsuite/c-c++-common/asan/pr120608-1.c b/gcc/testsuite/c-c++-common/asan/pr120608-1.c new file mode 100644 index ..114c42db6f83 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/pr120608-1.c @@ -0,0 +1,43 @@ +/* PR middle-end/120608 */ +/* { dg-do run { target musttail } } */ +/* { dg-options "-O2 -fsanitize=address" } */ + +__attribute__((noipa)) void +foo (int *x, int *y, int *z) +{ + ++x[0]; + ++y[0]; + ++z[0]; +} + +__attribute__((noipa)) void +bar (int *x, int *y, int *z) +{ + if (x || y || z) +__builtin_abort (); +} + +__attribute__((noipa)) void +baz (int *x, int *y, int *z) +{ + (void) x; (void) y; (void) z; + int a = 42, b = -42, c = 0; + foo (&a, &b, &c); + [[gnu::musttail]] return bar (0, 0, 0); +} + +__attribute__((noipa)) void +qux (int *x, int *y, int *z) +{ + (void) x; (void) y; (void) z; + int a = 42, b = -42, c = 0; + foo (&a, &b, &c); + [[gnu::musttail]] return bar (0, 0, 0); +} + +int +main () +{ + baz (0, 0, 0); + qux (0, 0, 0); +} diff --git a/gcc/testsuite/c-c++-common/asan/pr120608-2.c b/gcc/testsuite/c-c++-common/asan/pr120608-2.c new file mode 100644 index ..251ff3a1a074 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/pr120608-2.c @@ -0,0 +1,39 @@ +/* PR middle-end/120608 */ +/* { dg-do run { target musttail } } */ +/* { dg-options "-O2 -fsanitize=address" } */ +/* { dg-set-target-env-var ASAN_OPTIONS "detect_stack_use_after_return=1" } */ +/* { dg-shouldfail "asan" } */ + +__attribute__((noipa)) void +foo (int *x, int *y, int *z) +{ + ++x[0]; + ++y[0]; + ++z[0]; +} + +__attribute__((noipa)) void +bar (int *x, int *y, int *z) +{ + volatile int a = x[0] + y[0] + z[0]; +} + +__attribute__((noipa)) void +baz (int *x, int *y, int *z) +{ + (void) x; (void) y; (void) z; + int a = 42, b = -42, c = 0; + foo (&a, &b, &c); + [[gnu::musttail]] return bar (&a, &b, &c); /* { dg-warning "address of automatic variable 'a' passed to 'musttail' call argument" } */ +} /* { dg-warning "address of automatic variable 'b' passed to 'musttail' call argument" "" { target *-*-* } .-1 } */ + /* { dg-warning "address of automatic variable 'c' passed to 'musttail' call argument" "" { target *-*-* } .-2 } */ + +int +main () +{ + baz (0, 0, 0); +} + +// { dg-output "ERROR: AddressSanitizer: stack-use-after-return on address.*(\n|\r\n|\r)" } +// { dg-output "READ of size .*" } +// { dg-output ".*'a' \\(line 25\\) <== Memory access at offset \[0-9\]* is inside this variable.*" } diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc index f593363dae43..8ce8bcf0e20b 100644 --- a/gcc/tree-tailcall.cc +++ b/gcc/tree-tailcall.cc @@ -528,6 +528,10 @@ empty_eh_cleanup (basic_block bb, int *eh_has_tsan_func_exit, int cnt) *eh_has_tsan_func_exit = 1; continue; } + if (eh_has_tsan_func_exit +
[gcc r15-9854] expand: Allow musttail tail calls with -fsanitize=address [PR120608]
https://gcc.gnu.org/g:e5cf6027581770e97790f6495a56515ea4d0f7c2 commit r15-9854-ge5cf6027581770e97790f6495a56515ea4d0f7c2 Author: Jakub Jelinek Date: Mon Jun 23 15:58:55 2025 +0200 expand: Allow musttail tail calls with -fsanitize=address [PR120608] The following testcase is rejected by GCC 15 but accepted (with s/gnu/clang/) by clang. The problem is that we want to execute a sequence of instructions to unpoison all automatic variables in the function and mark the var block allocated for use-after-return sanitization poisoned after the call, so we were just disabling tail calls if there are any instructions returned from asan_emit_stack_protection. It is fine and necessary for normal tail calls, but for musttail tail calls we actually document that accessing the automatic vars of the caller is UB as if they end their lifetime right before the tail call, so we also want address sanitizer user-after-return to diagnose that. The following patch will only disable normal tail calls when that sequence is present, for musttail it will arrange to emit a copy of that sequence before the tail call sequence. That sequence only tweaks the shadow memory and nothing in the code emitted by call expansion should touch the shadow memory, so it is ok to emit it already before argument setup. 2025-06-23 Jakub Jelinek PR middle-end/120608 * cfgexpand.cc: Include rtl-iter.h. (expand_gimple_tailcall): Add ASAN_EPILOG_SEQ argument, if non-NULL and expand_gimple_stmt emitted a tail call, emit a copy of that insn sequence before the call sequence. (expand_gimple_basic_block): Remove DISABLE_TAIL_CALLS argument, add ASAN_EPILOG_SEQ argument. Disable tail call flag only on non-musttail calls if that flag is set, pass it to expand_gimple_tailcall. (pass_expand::execute): Pass VAR_RET_SEQ directly as last expand_gimple_basic_block argument rather than its comparison with NULL. * g++.dg/asan/pr120608.C: New test. (cherry picked from commit b9523a935aaa28ffae9118e199a2f43a8a98e27e) Diff: --- gcc/cfgexpand.cc | 68 gcc/testsuite/g++.dg/asan/pr120608.C | 17 + 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index 2b27076658fd..981faf36e93e 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -74,6 +74,7 @@ along with GCC; see the file COPYING3. If not see #include "output.h" #include "builtins.h" #include "opts.h" +#include "rtl-iter.h" /* Some systems use __main in a way incompatible with its use in gcc, in these cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to @@ -4400,9 +4401,10 @@ expand_gimple_stmt (gimple *stmt) tailcall) and the normal result happens via a sqrt instruction. */ static basic_block -expand_gimple_tailcall (basic_block bb, gcall *stmt, bool *can_fallthru) +expand_gimple_tailcall (basic_block bb, gcall *stmt, bool *can_fallthru, + rtx_insn *asan_epilog_seq) { - rtx_insn *last2, *last; + rtx_insn *last2, *last, *first = get_last_insn (); edge e; edge_iterator ei; profile_probability probability; @@ -4419,6 +4421,58 @@ expand_gimple_tailcall (basic_block bb, gcall *stmt, bool *can_fallthru) return NULL; found: + + if (asan_epilog_seq) +{ + /* We need to emit a copy of the asan_epilog_seq before +the insns emitted by expand_gimple_stmt above. The sequence +can contain labels, which need to be remapped. */ + hash_map label_map; + start_sequence (); + emit_note (NOTE_INSN_DELETED); + for (rtx_insn *insn = asan_epilog_seq; insn; insn = NEXT_INSN (insn)) + switch (GET_CODE (insn)) + { + case INSN: + case CALL_INSN: + case JUMP_INSN: + emit_copy_of_insn_after (insn, get_last_insn ()); + break; + case CODE_LABEL: + label_map.put ((rtx) insn, (rtx) emit_label (gen_label_rtx ())); + break; + case BARRIER: + emit_barrier (); + break; + default: + gcc_unreachable (); + } + for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (JUMP_P (insn)) + { + subrtx_ptr_iterator::array_type array; + FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), ALL) + { + rtx *loc = *iter; + if (LABEL_REF_P (*loc)) + { + rtx *lab = label_map.get ((rtx) label_ref_label (*loc)); + gcc_assert (lab); + set_label_ref_label (*loc, as_a (*lab)); + } + } + if (JUMP_LABEL (insn)) + { + rt
[gcc(refs/users/mikael/heads/non_lvalue_v05)] match: Simplify doubled not, negate and conjugate operators to a non_lvalue
https://gcc.gnu.org/g:85b4eb8956df894da3819cbbda4388ff6667fc23 commit 85b4eb8956df894da3819cbbda4388ff6667fc23 Author: Mikael Morin Date: Sat Jun 21 20:12:31 2025 +0200 match: Simplify doubled not, negate and conjugate operators to a non_lvalue gcc/ChangeLog: * match.pd (`-(-X)`, `~(~X)`, `conj(conj(X))`): Add a NON_LVALUE_EXPR wrapper to the simplification of doubled unary operators NEGATE_EXPR, BIT_NOT_EXPR and CONJ_EXPR. gcc/testsuite/ChangeLog: * gfortran.dg/non_lvalue_1.f90: New test. Diff: --- gcc/match.pd | 6 +++--- gcc/testsuite/gfortran.dg/non_lvalue_1.f90 | 32 ++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 0f53c162fce3..f4416d9172c4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2357,7 +2357,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* ~~x -> x */ (simplify (bit_not (bit_not @0)) - @0) + (non_lvalue @0)) /* zero_one_valued_p will match when a value is known to be either 0 or 1 including constants 0 or 1. @@ -4037,7 +4037,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (negate (nop_convert? (negate @1))) (if (!TYPE_OVERFLOW_SANITIZED (type) && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1))) - (view_convert @1))) + (non_lvalue (view_convert @1 /* We can't reassociate floating-point unless -fassociative-math or fixed-point plus or minus because of saturation to +-Inf. */ @@ -5767,7 +5767,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (conj (convert? (conj @0))) (if (tree_nop_conversion_p (TREE_TYPE (@0), type)) - (convert @0))) + (non_lvalue (convert @0 /* conj({x,y}) -> {x,-y} */ (simplify diff --git a/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 new file mode 100644 index ..61dad5a2ce1b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/non_lvalue_1.f90 @@ -0,0 +1,32 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } +! +! Check the generation of NON_LVALUE_EXPR expressions in cases where a unary +! operator expression would simplify to a bare data reference. + +! A NON_LVALUE_EXPR is generated for a double negation that would simplify to +! a bare data reference. +function f1 (f1_arg1) + integer, value :: f1_arg1 + integer :: f1 + f1 = -(-f1_arg1) +end function +! { dg-final { scan-tree-dump "__result_f1 = NON_LVALUE_EXPR ;" "original" } } + +! A NON_LVALUE_EXPR is generated for a double complement that would simplify to +! a bare data reference. +function f2 (f2_arg1) + integer, value :: f2_arg1 + integer :: f2 + f2 = not(not(f2_arg1)) +end function +! { dg-final { scan-tree-dump "__result_f2 = NON_LVALUE_EXPR ;" "original" } } + +! A NON_LVALUE_EXPR is generated for a double complex conjugate that would +! simplify to a bare data reference. +function f3 (f3_arg1) + complex, value :: f3_arg1 + complex :: f3 + f3 = conjg(conjg(f3_arg1)) +end function +! { dg-final { scan-tree-dump "__result_f3 = NON_LVALUE_EXPR ;" "original" } }
[gcc r16-1625] expand: Allow musttail tail calls with -fsanitize=address [PR120608]
https://gcc.gnu.org/g:b9523a935aaa28ffae9118e199a2f43a8a98e27e commit r16-1625-gb9523a935aaa28ffae9118e199a2f43a8a98e27e Author: Jakub Jelinek Date: Mon Jun 23 15:58:55 2025 +0200 expand: Allow musttail tail calls with -fsanitize=address [PR120608] The following testcase is rejected by GCC 15 but accepted (with s/gnu/clang/) by clang. The problem is that we want to execute a sequence of instructions to unpoison all automatic variables in the function and mark the var block allocated for use-after-return sanitization poisoned after the call, so we were just disabling tail calls if there are any instructions returned from asan_emit_stack_protection. It is fine and necessary for normal tail calls, but for musttail tail calls we actually document that accessing the automatic vars of the caller is UB as if they end their lifetime right before the tail call, so we also want address sanitizer user-after-return to diagnose that. The following patch will only disable normal tail calls when that sequence is present, for musttail it will arrange to emit a copy of that sequence before the tail call sequence. That sequence only tweaks the shadow memory and nothing in the code emitted by call expansion should touch the shadow memory, so it is ok to emit it already before argument setup. 2025-06-23 Jakub Jelinek PR middle-end/120608 * cfgexpand.cc: Include rtl-iter.h. (expand_gimple_tailcall): Add ASAN_EPILOG_SEQ argument, if non-NULL and expand_gimple_stmt emitted a tail call, emit a copy of that insn sequence before the call sequence. (expand_gimple_basic_block): Remove DISABLE_TAIL_CALLS argument, add ASAN_EPILOG_SEQ argument. Disable tail call flag only on non-musttail calls if that flag is set, pass it to expand_gimple_tailcall. (pass_expand::execute): Pass VAR_RET_SEQ directly as last expand_gimple_basic_block argument rather than its comparison with NULL. * g++.dg/asan/pr120608.C: New test. Diff: --- gcc/cfgexpand.cc | 68 gcc/testsuite/g++.dg/asan/pr120608.C | 17 + 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index e1cdb718e127..33649d43f71c 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -75,6 +75,7 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "opts.h" #include "gimple-range.h" +#include "rtl-iter.h" /* Some systems use __main in a way incompatible with its use in gcc, in these cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to @@ -4458,9 +4459,10 @@ expand_gimple_stmt (gimple *stmt) tailcall) and the normal result happens via a sqrt instruction. */ static basic_block -expand_gimple_tailcall (basic_block bb, gcall *stmt, bool *can_fallthru) +expand_gimple_tailcall (basic_block bb, gcall *stmt, bool *can_fallthru, + rtx_insn *asan_epilog_seq) { - rtx_insn *last2, *last; + rtx_insn *last2, *last, *first = get_last_insn (); edge e; edge_iterator ei; profile_probability probability; @@ -4477,6 +4479,58 @@ expand_gimple_tailcall (basic_block bb, gcall *stmt, bool *can_fallthru) return NULL; found: + + if (asan_epilog_seq) +{ + /* We need to emit a copy of the asan_epilog_seq before +the insns emitted by expand_gimple_stmt above. The sequence +can contain labels, which need to be remapped. */ + hash_map label_map; + start_sequence (); + emit_note (NOTE_INSN_DELETED); + for (rtx_insn *insn = asan_epilog_seq; insn; insn = NEXT_INSN (insn)) + switch (GET_CODE (insn)) + { + case INSN: + case CALL_INSN: + case JUMP_INSN: + emit_copy_of_insn_after (insn, get_last_insn ()); + break; + case CODE_LABEL: + label_map.put ((rtx) insn, (rtx) emit_label (gen_label_rtx ())); + break; + case BARRIER: + emit_barrier (); + break; + default: + gcc_unreachable (); + } + for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) + if (JUMP_P (insn)) + { + subrtx_ptr_iterator::array_type array; + FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), ALL) + { + rtx *loc = *iter; + if (LABEL_REF_P (*loc)) + { + rtx *lab = label_map.get ((rtx) label_ref_label (*loc)); + gcc_assert (lab); + set_label_ref_label (*loc, as_a (*lab)); + } + } + if (JUMP_LABEL (insn)) + { + rtx *lab = label_map.get (JUMP_LABEL (insn)); + gcc_assert (
[gcc r16-1626] tailc: Allow musttail tail calls with -fsanitize=address [PR120608]
https://gcc.gnu.org/g:35a26f2ec55d20d524464c33b68b23328a7f6bbe commit r16-1626-g35a26f2ec55d20d524464c33b68b23328a7f6bbe Author: Jakub Jelinek Date: Mon Jun 23 16:08:34 2025 +0200 tailc: Allow musttail tail calls with -fsanitize=address [PR120608] These testcases show another problem with -fsanitize=address vs. musttail tail calls. In particular, there can be .ASAN_MARK (POISON, &a, 4); etc. calls after a tail call and those just prevent the tailc pass to mark the musttail calls as [tail call]. Normally, the sanopt pass (which comes after tailc) will optimize those away, the optimization is if there are no .ASAN_CHECK calls or normal function calls dominated by those .ASAN_MARK (POSION, ...) calls, the poison is not needed, because in the epilog sequence (the one dealt with in the patch posted earlier today) all the stack slots are unpoisoned anyway (or poisoned for use-after-return). Unlike __builtin_tsan_exit_function, .ASAN_MARK is not a real function and is always expanded inline, so can be never tail called successfully, so the patch just ignores those for the cfun->has_musttail && diag_musttail cases. If there is a non-musttail call, it will fail worst case during expansion because there is the epilog asan sequence. 2025-06-12 Jakub Jelinek PR middle-end/120608 * tree-tailcall.cc (empty_eh_cleanup): Ignore .ASAN_MARK (POISON) internal calls for the cfun->has_musttail case and diag_musttail. (find_tail_calls): Likewise. * c-c++-common/asan/pr120608-1.c: New test. * c-c++-common/asan/pr120608-2.c: New test. Diff: --- gcc/testsuite/c-c++-common/asan/pr120608-1.c | 43 gcc/testsuite/c-c++-common/asan/pr120608-2.c | 39 + gcc/tree-tailcall.cc | 16 +++ 3 files changed, 98 insertions(+) diff --git a/gcc/testsuite/c-c++-common/asan/pr120608-1.c b/gcc/testsuite/c-c++-common/asan/pr120608-1.c new file mode 100644 index ..114c42db6f83 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/pr120608-1.c @@ -0,0 +1,43 @@ +/* PR middle-end/120608 */ +/* { dg-do run { target musttail } } */ +/* { dg-options "-O2 -fsanitize=address" } */ + +__attribute__((noipa)) void +foo (int *x, int *y, int *z) +{ + ++x[0]; + ++y[0]; + ++z[0]; +} + +__attribute__((noipa)) void +bar (int *x, int *y, int *z) +{ + if (x || y || z) +__builtin_abort (); +} + +__attribute__((noipa)) void +baz (int *x, int *y, int *z) +{ + (void) x; (void) y; (void) z; + int a = 42, b = -42, c = 0; + foo (&a, &b, &c); + [[gnu::musttail]] return bar (0, 0, 0); +} + +__attribute__((noipa)) void +qux (int *x, int *y, int *z) +{ + (void) x; (void) y; (void) z; + int a = 42, b = -42, c = 0; + foo (&a, &b, &c); + [[gnu::musttail]] return bar (0, 0, 0); +} + +int +main () +{ + baz (0, 0, 0); + qux (0, 0, 0); +} diff --git a/gcc/testsuite/c-c++-common/asan/pr120608-2.c b/gcc/testsuite/c-c++-common/asan/pr120608-2.c new file mode 100644 index ..251ff3a1a074 --- /dev/null +++ b/gcc/testsuite/c-c++-common/asan/pr120608-2.c @@ -0,0 +1,39 @@ +/* PR middle-end/120608 */ +/* { dg-do run { target musttail } } */ +/* { dg-options "-O2 -fsanitize=address" } */ +/* { dg-set-target-env-var ASAN_OPTIONS "detect_stack_use_after_return=1" } */ +/* { dg-shouldfail "asan" } */ + +__attribute__((noipa)) void +foo (int *x, int *y, int *z) +{ + ++x[0]; + ++y[0]; + ++z[0]; +} + +__attribute__((noipa)) void +bar (int *x, int *y, int *z) +{ + volatile int a = x[0] + y[0] + z[0]; +} + +__attribute__((noipa)) void +baz (int *x, int *y, int *z) +{ + (void) x; (void) y; (void) z; + int a = 42, b = -42, c = 0; + foo (&a, &b, &c); + [[gnu::musttail]] return bar (&a, &b, &c); /* { dg-warning "address of automatic variable 'a' passed to 'musttail' call argument" } */ +} /* { dg-warning "address of automatic variable 'b' passed to 'musttail' call argument" "" { target *-*-* } .-1 } */ + /* { dg-warning "address of automatic variable 'c' passed to 'musttail' call argument" "" { target *-*-* } .-2 } */ + +int +main () +{ + baz (0, 0, 0); +} + +// { dg-output "ERROR: AddressSanitizer: stack-use-after-return on address.*(\n|\r\n|\r)" } +// { dg-output "READ of size .*" } +// { dg-output ".*'a' \\(line 25\\) <== Memory access at offset \[0-9\]* is inside this variable.*" } diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc index 10e88d9c8292..d6d283022113 100644 --- a/gcc/tree-tailcall.cc +++ b/gcc/tree-tailcall.cc @@ -528,6 +528,10 @@ empty_eh_cleanup (basic_block bb, int *eh_has_tsan_func_exit, int cnt) *eh_has_tsan_func_exit = 1; continue; } + if (eh_has_tsan_func_exit + && sanitize_flags_p (SANITIZE_ADDRESS) + && asan_mark_p (g, ASA
[gcc r16-1627] RISC-V: Combine vec_duplicate + vsaddu.vv to vsaddu.vx on GR2VR cost
https://gcc.gnu.org/g:a2d018b642019165511e89d47bfb46af55f81f98 commit r16-1627-ga2d018b642019165511e89d47bfb46af55f81f98 Author: Pan Li Date: Sat Jun 21 09:00:16 2025 +0800 RISC-V: Combine vec_duplicate + vsaddu.vv to vsaddu.vx on GR2VR cost This patch would like to combine the vec_duplicate + vsaddu.vv to the vsaddu.vx. From example as below code. The related pattern will depend on the cost of vec_duplicate from GR2VR. Then the late-combine will take action if the cost of GR2VR is zero, and reject the combination if the GR2VR cost is greater than zero. Assume we have example code like below, GR2VR cost is 0. #define DEF_VX_BINARY(T, FUNC) \ void\ test_vx_binary (T * restrict out, T * restrict in, T x, unsigned n) \ { \ for (unsigned i = 0; i < n; i++) \ out[i] = FUNC (in[i], x); \ } T sat_add(T a, T b) { return (a + b) | (-(T)((T)(a + b) < a)); } DEF_VX_BINARY(uint32_t, sat_add) Before this patch: 10 │ test_vx_binary_or_int32_t_case_0: 11 │ beq a3,zero,.L8 12 │ vsetvli a5,zero,e32,m1,ta,ma 13 │ vmv.v.x v2,a2 14 │ sllia3,a3,32 15 │ srlia3,a3,32 16 │ .L3: 17 │ vsetvli a5,a3,e32,m1,ta,ma 18 │ vle32.v v1,0(a1) 19 │ sllia4,a5,2 20 │ sub a3,a3,a5 21 │ add a1,a1,a4 22 │ vsaddu.vv v1,v1,v2 23 │ vse32.v v1,0(a0) 24 │ add a0,a0,a4 25 │ bne a3,zero,.L3 After this patch: 10 │ test_vx_binary_or_int32_t_case_0: 11 │ beq a3,zero,.L8 12 │ sllia3,a3,32 13 │ srlia3,a3,32 14 │ .L3: 15 │ vsetvli a5,a3,e32,m1,ta,ma 16 │ vle32.v v1,0(a1) 17 │ sllia4,a5,2 18 │ sub a3,a3,a5 19 │ add a1,a1,a4 20 │ vsaddu.vx v1,v1,a2 21 │ vse32.v v1,0(a0) 22 │ add a0,a0,a4 23 │ bne a3,zero,.L3 gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_vx_binary_vec_dup_vec): Add new case US_PLUS. (expand_vx_binary_vec_vec_dup): Ditto. * config/riscv/riscv.cc (riscv_rtx_costs): Ditto. * config/riscv/vector-iterators.md: Add new op us_plus. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/riscv-v.cc | 2 ++ gcc/config/riscv/riscv.cc| 1 + gcc/config/riscv/vector-iterators.md | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index ac690df3688a..45dd9256d020 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -5541,6 +5541,7 @@ expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2, case UMAX: case SMIN: case UMIN: +case US_PLUS: icode = code_for_pred_scalar (code, mode); break; case MINUS: @@ -5579,6 +5580,7 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2, case UMAX: case SMIN: case UMIN: +case US_PLUS: icode = code_for_pred_scalar (code, mode); break; default: diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 80498d6758ba..bbc7547d385f 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3995,6 +3995,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN case UDIV: case MOD: case UMOD: + case US_PLUS: *total = get_vector_binary_rtx_cost (op, scalar2vr_cost); break; default: diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index 44ae79c48aa7..0e1318d1447c 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -4042,11 +4042,11 @@ ]) (define_code_iterator any_int_binop_no_shift_v_vdup [ - plus minus and ior xor mult div udiv mod umod smax umax smin umin + plus minus and ior xor mult div udiv mod umod smax umax smin umin us_plus ]) (define_code_iterator any_int_binop_no_shift_vdup_v [ - plus minus and ior xor mult smax umax smin umin + plus minus and ior xor mult smax umax smin umin us_plus ]) (define_code_iterator any_int_unop [neg not])
[gcc r16-1628] RISC-V: Add test for vec_duplicate + vsaddu.vv combine case 0 with GR2VR cost 0, 2 and 15
https://gcc.gnu.org/g:9a8f82d6a63e36ffba883b365101b58955ca7c64 commit r16-1628-g9a8f82d6a63e36ffba883b365101b58955ca7c64 Author: Pan Li Date: Sat Jun 21 09:10:07 2025 +0800 RISC-V: Add test for vec_duplicate + vsaddu.vv combine case 0 with GR2VR cost 0, 2 and 15 Add asm dump check and run test for vec_duplicate + vsaddu.vv combine to vsaddu.vx, with the GR2VR cost is 0, 2 and 15. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c: Add asm check. * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h: Add test helper macros. * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: Add test data for run test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u16.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u32.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u64.c: New test. * gcc.target/riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u8.c: New test. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u16.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u32.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u64.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-2-u8.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u16.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u32.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u64.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx-3-u8.c | 1 + .../gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h | 42 +++-- .../riscv/rvv/autovec/vx_vf/vx_binary_data.h | 196 + .../riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u16.c | 17 ++ .../riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u32.c | 17 ++ .../riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u64.c | 17 ++ .../riscv/rvv/autovec/vx_vf/vx_vsadd-run-1-u8.c| 17 ++ 18 files changed, 305 insertions(+), 13 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c index bcfd5145d24f..21a207edce75 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u16.c @@ -17,3 +17,4 @@ TEST_BINARY_VX_UNSIGNED_0(T) /* { dg-final { scan-assembler-times {vremu.vx} 1 } } */ /* { dg-final { scan-assembler-times {vmaxu.vx} 2 } } */ /* { dg-final { scan-assembler-times {vminu.vx} 2 } } */ +/* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c index b9a6a2830916..d1063adb0d6c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u32.c @@ -17,3 +17,4 @@ TEST_BINARY_VX_UNSIGNED_0(T) /* { dg-final { scan-assembler-times {vremu.vx} 1 } } */ /* { dg-final { scan-assembler-times {vmaxu.vx} 2 } } */ /* { dg-final { scan-assembler-times {vminu.vx} 2 } } */ +/* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c index abb5e5e78428..3d96503fd9ad 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c @@ -17,3 +17,4 @@ TEST_BINARY_VX_UNSIGNED_0(T) /* { dg-final { scan-assembler-times {vremu.vx} 1 } } */ /* { dg-final { scan-assembler-times {vmaxu.vx} 2 } } */ /* { dg-final { scan-assembler-times {vminu.vx} 2 } } */ +/* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c index 50065d0973b2..339a35c3f422 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u
[gcc r16-1629] RISC-V: Add test for vec_duplicate + vsaddu.vv combine case 1 with GR2VR cost 0, 1 and 2
https://gcc.gnu.org/g:11811e698b460b5fe45777f4c333aa74655cff39 commit r16-1629-g11811e698b460b5fe45777f4c333aa74655cff39 Author: Pan Li Date: Sat Jun 21 10:07:38 2025 +0800 RISC-V: Add test for vec_duplicate + vsaddu.vv combine case 1 with GR2VR cost 0, 1 and 2 Add asm dump check test for vec_duplicate + vsaddu.vv combine to vsaddu.vx, with the GR2VR cost is 0, 1 and 2. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c: Add asm check for vsaddu.vx combine. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c: Ditto. * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c: Ditto. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c | 1 + gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c | 3 +++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c | 2 ++ gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c | 2 ++ 12 files changed, 24 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c index b62164347186..de10d66a1b23 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c @@ -17,6 +17,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_0_WARP(T), max, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ @@ -28,3 +29,4 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vremu.vx} } } */ /* { dg-final { scan-assembler {vmaxu.vx} } } */ /* { dg-final { scan-assembler {vminu.vx} } } */ +/* { dg-final { scan-assembler {vsaddu.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c index 741a7495f136..2e59da06c979 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c @@ -17,6 +17,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_0_WARP(T), max, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4) +DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c index 70375b174734..064ed1f2e892 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c @@ -17,6 +17,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_0_WARP(T), max, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, MAX_FUNC_1_WARP(T), max, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_0_WARP(T), min, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY) +DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ @@ -28,3 +29,4 @@ DEF_VX_BINARY_CASE_3_W
[gcc r16-1630] diagnostics: handle pp_token::kind::event_id in experimental-html sink [PR116792]
https://gcc.gnu.org/g:5a64c96cfe7a4d5783319c2fb8472bc75d702e9a commit r16-1630-g5a64c96cfe7a4d5783319c2fb8472bc75d702e9a Author: David Malcolm Date: Mon Jun 23 11:06:33 2025 -0400 diagnostics: handle pp_token::kind::event_id in experimental-html sink [PR116792] gcc/ChangeLog: PR other/116792 * diagnostic-format-html.cc (html_token_printer::print_tokens): Handle pp_token::kind::event_id. (selftest::test_token_printer): Add coverage of printing an event id. Signed-off-by: David Malcolm Diff: --- gcc/diagnostic-format-html.cc | 19 +++ 1 file changed, 19 insertions(+) diff --git a/gcc/diagnostic-format-html.cc b/gcc/diagnostic-format-html.cc index 45d088150dd6..5668b50a91ae 100644 --- a/gcc/diagnostic-format-html.cc +++ b/gcc/diagnostic-format-html.cc @@ -799,6 +799,16 @@ public: case pp_token::kind::end_url: m_xp.pop_tag ("a"); break; + + case pp_token::kind::event_id: + { + pp_token_event_id *sub = as_a (iter); + gcc_assert (sub->m_event_id.known_p ()); + m_xp.add_text ("("); + m_xp.add_text (std::to_string (sub->m_event_id.one_based ())); + m_xp.add_text (")"); + } + break; } } @@ -1375,6 +1385,15 @@ test_token_printer () "'" "\n"); } + + { +token_printer_test t; +diagnostic_event_id_t event_id (0); +pp_printf (&t.m_pp, "foo %@ bar", &event_id); +ASSERT_XML_PRINT_EQ + (t.m_top_element, + "foo (1) bar\n"); + } } /* A subclass of html_output_format for writing selftests.
[gcc] Deleted branch 'mikael/heads/select_type_name_v04' in namespace 'refs/users'
The branch 'mikael/heads/select_type_name_v04' in namespace 'refs/users' was deleted. It previously pointed to: ed83521b3c74... fortran: Mention user variable in SELECT TYPE temporary var Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- ed83521... fortran: Mention user variable in SELECT TYPE temporary var
[gcc(refs/users/mikael/heads/select_type_name_v04)] fortran: Mention user variable in SELECT TYPE temporary variable names
https://gcc.gnu.org/g:ff3ca6ea6c1ee38fa419c3539febf1efba50b088 commit ff3ca6ea6c1ee38fa419c3539febf1efba50b088 Author: Mikael Morin Date: Fri Jun 20 12:08:02 2025 +0200 fortran: Mention user variable in SELECT TYPE temporary variable names The temporary variables that are generated to implement SELECT TYPE and TYPE IS statements have (before this change) a name depending only on the type. This can produce confusing dumps with code having multiple SELECT TYPE statements, as it isn't obvious which SELECT TYPE construct the variable relates to. This is especially the case with nested SELECT TYPE statements and with SELECT TYPE variables having identical types (and thus identical names). This change adds one additional user-provided discriminating string in the variable names, using the value from the SELECT TYPE variable name or last component reference name. The additional string may be truncated to fit in the temporary buffer. This requires all buffers to have matching sizes to get the same resulting name everywhere. gcc/fortran/ChangeLog: * misc.cc (gfc_var_name_for_select_type_temp): New function. * gfortran.h (gfc_var_name_for_select_type_temp): Declare it. * resolve.cc (resolve_select_type): Pick a discriminating name from the SELECT TYPE variable reference and use it in the name of the temporary variable that is generated. Truncate name to the buffer size. * match.cc (select_type_set_tmp): Likewise. Pass the discriminating name... (select_intrinsic_set_tmp): ... to this function. Use the discriminating name likewise. Augment the buffer size to match that of select_type_set_tmp and resolve_select_type. gcc/testsuite/ChangeLog: * gfortran.dg/select_type_51.f90: New test. Diff: --- gcc/fortran/gfortran.h | 2 ++ gcc/fortran/match.cc | 24 +++--- gcc/fortran/misc.cc | 21 gcc/fortran/resolve.cc | 21 ++-- gcc/testsuite/gfortran.dg/select_type_51.f90 | 37 5 files changed, 88 insertions(+), 17 deletions(-) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index f73b5f9c23f4..6848bd1762d3 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -3507,6 +3507,8 @@ void gfc_done_2 (void); int get_c_kind (const char *, CInteropKind_t *); +const char * gfc_var_name_for_select_type_temp (gfc_expr *); + const char *gfc_closest_fuzzy_match (const char *, char **); inline void vec_push (char **&optr, size_t &osz, const char *elt) diff --git a/gcc/fortran/match.cc b/gcc/fortran/match.cc index a99a757bede6..aa0b04afd563 100644 --- a/gcc/fortran/match.cc +++ b/gcc/fortran/match.cc @@ -7171,9 +7171,11 @@ select_type_push (gfc_symbol *sel) /* Set the temporary for the current intrinsic SELECT TYPE selector. */ static gfc_symtree * -select_intrinsic_set_tmp (gfc_typespec *ts) +select_intrinsic_set_tmp (gfc_typespec *ts, const char *var_name) { - char name[GFC_MAX_SYMBOL_LEN]; + /* Keep size in sync with the buffer size in resolve_select_type as it + determines the final name through truncation. */ + char name[GFC_MAX_SYMBOL_LEN + 12 + 1]; gfc_symtree *tmp; HOST_WIDE_INT charlen = 0; gfc_symbol *selector = select_type_stack->selector; @@ -7192,12 +7194,12 @@ select_intrinsic_set_tmp (gfc_typespec *ts) charlen = gfc_mpz_get_hwi (ts->u.cl->length->value.integer); if (ts->type != BT_CHARACTER) -sprintf (name, "__tmp_%s_%d", gfc_basic_typename (ts->type), -ts->kind); +snprintf (name, sizeof (name), "__tmp_%s_%d_%s", + gfc_basic_typename (ts->type), ts->kind, var_name); else snprintf (name, sizeof (name), - "__tmp_%s_" HOST_WIDE_INT_PRINT_DEC "_%d", - gfc_basic_typename (ts->type), charlen, ts->kind); + "__tmp_%s_" HOST_WIDE_INT_PRINT_DEC "_%d_%s", + gfc_basic_typename (ts->type), charlen, ts->kind, var_name); gfc_get_sym_tree (name, gfc_current_ns, &tmp, false); sym = tmp->n.sym; @@ -7239,7 +7241,9 @@ select_type_set_tmp (gfc_typespec *ts) return; } - tmp = select_intrinsic_set_tmp (ts); + gfc_expr *select_type_expr = gfc_state_stack->construct->expr1; + const char *var_name = gfc_var_name_for_select_type_temp (select_type_expr); + tmp = select_intrinsic_set_tmp (ts, var_name); if (tmp == NULL) { @@ -7247,9 +7251,11 @@ select_type_set_tmp (gfc_typespec *ts) return; if (ts->type == BT_CLASS) - sprintf (name, "__tmp_class_%s", ts->u.derived->name); + snprintf (name, sizeof (name), "__tmp_class_%s_%s", ts->u.derived->name, + var_name); else - sprintf (name, "__tmp_type_%s", ts->u.der
[gcc] Created branch 'mikael/heads/select_type_name_v04' in namespace 'refs/users'
The branch 'mikael/heads/select_type_name_v04' was created in namespace 'refs/users' pointing to: ff3ca6ea6c1e... fortran: Mention user variable in SELECT TYPE temporary var
[gcc(refs/users/mikael/heads/select_type_name_v04)] fortran: Mention user variable in SELECT TYPE temporary variable names
https://gcc.gnu.org/g:ed83521b3c747b7ddedeaa32b97801ca25d9633e commit ed83521b3c747b7ddedeaa32b97801ca25d9633e Author: Mikael Morin Date: Fri Jun 20 12:08:02 2025 +0200 fortran: Mention user variable in SELECT TYPE temporary variable names The temporary variables that are generated to implement SELECT TYPE and TYPE IS statements have (before this change) a name depending only on the type. This can produce confusing dumps with code having multiple SELECT TYPE statements, as it isn't obvious which SELECT TYPE construct the variable relates to. This is especially the case with nested SELECT TYPE statements and with SELECT TYPE variables having identical types (and thus identical names). This change adds one additional user-provided discriminating string in the variable names, using the value from the SELECT TYPE variable name or last component reference name. The additional string may be truncated to fit in the temporary buffer. This requires all buffers to have matching sizes to get the same resulting name everywhere. gcc/fortran/ChangeLog: * misc.cc (gfc_var_name_for_select_type_temp): New function. * gfortran.h (gfc_var_name_for_select_type_temp): Declare it. * resolve.cc (resolve_select_type): Pick a discriminating name from the SELECT TYPE variable reference and use it in the name of the temporary variable that is generated. Truncate name to the buffer size. * match.cc (select_type_set_tmp): Likewise. Pass the discriminating name... (select_intrinsic_set_tmp): ... to this function. Use the discriminating name likewise. Augment the buffer size to match that of select_type_set_tmp and resolve_select_type. gcc/testsuite/ChangeLog: * gfortran.dg/select_type_51.f90: New test. Diff: --- gcc/fortran/gfortran.h | 2 ++ gcc/fortran/match.cc | 22 ++--- gcc/fortran/misc.cc | 21 gcc/fortran/resolve.cc | 21 ++-- gcc/testsuite/gfortran.dg/select_type_51.f90 | 37 5 files changed, 86 insertions(+), 17 deletions(-) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index f73b5f9c23f4..6848bd1762d3 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -3507,6 +3507,8 @@ void gfc_done_2 (void); int get_c_kind (const char *, CInteropKind_t *); +const char * gfc_var_name_for_select_type_temp (gfc_expr *); + const char *gfc_closest_fuzzy_match (const char *, char **); inline void vec_push (char **&optr, size_t &osz, const char *elt) diff --git a/gcc/fortran/match.cc b/gcc/fortran/match.cc index a99a757bede6..c3a6ded942d8 100644 --- a/gcc/fortran/match.cc +++ b/gcc/fortran/match.cc @@ -7171,9 +7171,9 @@ select_type_push (gfc_symbol *sel) /* Set the temporary for the current intrinsic SELECT TYPE selector. */ static gfc_symtree * -select_intrinsic_set_tmp (gfc_typespec *ts) +select_intrinsic_set_tmp (gfc_typespec *ts, const char *var_name) { - char name[GFC_MAX_SYMBOL_LEN]; + char name[GFC_MAX_SYMBOL_LEN + 12 + 1]; gfc_symtree *tmp; HOST_WIDE_INT charlen = 0; gfc_symbol *selector = select_type_stack->selector; @@ -7192,12 +7192,12 @@ select_intrinsic_set_tmp (gfc_typespec *ts) charlen = gfc_mpz_get_hwi (ts->u.cl->length->value.integer); if (ts->type != BT_CHARACTER) -sprintf (name, "__tmp_%s_%d", gfc_basic_typename (ts->type), -ts->kind); +snprintf (name, sizeof (name), "__tmp_%s_%d_%s", + gfc_basic_typename (ts->type), ts->kind, var_name); else snprintf (name, sizeof (name), - "__tmp_%s_" HOST_WIDE_INT_PRINT_DEC "_%d", - gfc_basic_typename (ts->type), charlen, ts->kind); + "__tmp_%s_" HOST_WIDE_INT_PRINT_DEC "_%d_%s", + gfc_basic_typename (ts->type), charlen, ts->kind, var_name); gfc_get_sym_tree (name, gfc_current_ns, &tmp, false); sym = tmp->n.sym; @@ -7239,7 +7239,9 @@ select_type_set_tmp (gfc_typespec *ts) return; } - tmp = select_intrinsic_set_tmp (ts); + gfc_expr *select_type_expr = gfc_state_stack->construct->expr1; + const char *var_name = gfc_var_name_for_select_type_temp (select_type_expr); + tmp = select_intrinsic_set_tmp (ts, var_name); if (tmp == NULL) { @@ -7247,9 +7249,11 @@ select_type_set_tmp (gfc_typespec *ts) return; if (ts->type == BT_CLASS) - sprintf (name, "__tmp_class_%s", ts->u.derived->name); + snprintf (name, sizeof (name), "__tmp_class_%s_%s", ts->u.derived->name, + var_name); else - sprintf (name, "__tmp_type_%s", ts->u.derived->name); + snprintf (name, sizeof (name), "__tmp_type_%s_%s", ts->u.derived->name, + var_name); g
[gcc] Created branch 'mikael/heads/select_type_name_v04' in namespace 'refs/users'
The branch 'mikael/heads/select_type_name_v04' was created in namespace 'refs/users' pointing to: ed83521b3c74... fortran: Mention user variable in SELECT TYPE temporary var
[gcc r16-1639] [RISC-V][PR target/118241] Fix data prefetch predicate/constraint for RISC-V
https://gcc.gnu.org/g:bf7162b321128ba93521a824e5a7a00d1cc3d1f8 commit r16-1639-gbf7162b321128ba93521a824e5a7a00d1cc3d1f8 Author: Jeff Law Date: Mon Jun 23 18:27:49 2025 -0600 [RISC-V][PR target/118241] Fix data prefetch predicate/constraint for RISC-V Fix typo in comment spotted by Peter B. PR target/118241 gcc/ * config/riscv/predicates.md: Fix comment typo in recent change. Diff: --- gcc/config/riscv/predicates.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index 8072d67fbd97..061904b6e000 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -27,7 +27,7 @@ (ior (match_operand 0 "const_arith_operand") (match_operand 0 "register_operand"))) -;; REG or REG+D where D fits in a simm12 and has the low 4 bits +;; REG or REG+D where D fits in a simm12 and has the low 5 bits ;; off. The REG+D form can be reloaded into a temporary if needed ;; after FP elimination if that exposes an invalid offset. (define_predicate "prefetch_operand"
[gcc r16-1637] Fixup dropping REG_EQUAL note in ext-dce
https://gcc.gnu.org/g:cdd678544fefc313cb1c9da0327158d3ed355f62 commit r16-1637-gcdd678544fefc313cb1c9da0327158d3ed355f62 Author: Sam James Date: Mon Jun 23 23:28:01 2025 +0100 Fixup dropping REG_EQUAL note in ext-dce Followup to r16-1613-g34e1e5e33ec3eb. remove_reg_equal_equiv_notes's 2nd argument is 'no_rescan' which we accidentally had on, tripping an assert in combine or ira because we hadn't left things in a consistent state. Fix the thinko by enabling rescanning. gcc/ChangeLog: PR rtl-optimization/120795 * ext-dce.cc (ext_dce_try_optimize_insn): Enable rescan in remove_reg_equal_equiv_notes call. Co-authored-by: Jeff Law Diff: --- gcc/ext-dce.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index b1d5ee4b36c4..df17b018bf1f 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -446,7 +446,7 @@ ext_dce_try_optimize_insn (rtx_insn *insn, rtx set) /* INSN may have a REG_EQUAL note indicating that the value was sign or zero extended. That note is no longer valid since we've just removed the extension. Just wipe the notes. */ - remove_reg_equal_equiv_notes (insn, true); + remove_reg_equal_equiv_notes (insn, false); } else {
[gcc r16-1640] Fix shrink wrap separate ICE for mingw [PR120741]
https://gcc.gnu.org/g:4b739c020a90dfe2569a292c44b2293a94d4bff5 commit r16-1640-g4b739c020a90dfe2569a292c44b2293a94d4bff5 Author: Lili Cui Date: Tue Jun 24 10:49:43 2025 +0800 Fix shrink wrap separate ICE for mingw [PR120741] gcc/ChangeLog: PR target/120741 * config/i386/i386.cc (ix86_expand_prologue): Remove 1 assertion. gcc/testsuite/ChangeLog: PR target/120741 * gcc.target/i386/pr120741.c: New test. * gcc.target/i386/shrink-wrap-separate-mingw.c: Likewise. Diff: --- gcc/config/i386/i386.cc| 2 -- gcc/testsuite/gcc.target/i386/pr120741.c | 22 ++ .../gcc.target/i386/shrink-wrap-separate-mingw.c | 22 ++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index fc3105919f45..84081ab12670 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -9443,8 +9443,6 @@ ix86_expand_prologue (void) } else { - gcc_assert (!crtl->shrink_wrapped_separate); - rtx eax = gen_rtx_REG (Pmode, AX_REG); rtx r10 = NULL; const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); diff --git a/gcc/testsuite/gcc.target/i386/pr120741.c b/gcc/testsuite/gcc.target/i386/pr120741.c new file mode 100644 index ..b59a58c48b89 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120741.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mstack-arg-probe" } */ + +short __mingw_swformat_format; +__builtin_va_list __mingw_swformat_arg; +int __mingw_swformat_fc; +typedef struct { + void *fp; + int bch[1024]; +} _IFP; +void __mingw_swformat(_IFP *s) { + if (s->fp) +while (__mingw_swformat_format) + if (__mingw_swformat_fc == 'A') + *__builtin_va_arg(__mingw_swformat_arg, double *) = 0; +} +void +__mingw_vswscanf (void) +{ + _IFP ifp; + __mingw_swformat(&ifp); +} diff --git a/gcc/testsuite/gcc.target/i386/shrink-wrap-separate-mingw.c b/gcc/testsuite/gcc.target/i386/shrink-wrap-separate-mingw.c new file mode 100644 index ..58635e49647a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/shrink-wrap-separate-mingw.c @@ -0,0 +1,22 @@ +/* { dg-do compile { target *-*-mingw* *-*-cygwin* } } */ +/* { dg-options "-std=gnu99 -O2" } */ + +short __mingw_swformat_format; +__builtin_va_list __mingw_swformat_arg; +int __mingw_swformat_fc; +typedef struct { + void *fp; + int bch[1024]; +} _IFP; +void __mingw_swformat(_IFP *s) { + if (s->fp) +while (__mingw_swformat_format) + if (__mingw_swformat_fc == 'A') +*__builtin_va_arg(__mingw_swformat_arg, double *) = 0; +} +void +__mingw_vswscanf (void) +{ + _IFP ifp; + __mingw_swformat(&ifp); +}
[gcc r16-1641] Fix AFDO zero profile handling
https://gcc.gnu.org/g:c24eb5e01da5ce07f6b616aff1129d4acbff69e6 commit r16-1641-gc24eb5e01da5ce07f6b616aff1129d4acbff69e6 Author: Jan Hubicka Date: Tue Jun 24 05:00:01 2025 +0200 Fix AFDO zero profile handling This patch fixes roms autofdo regression I introduced yesterday. What happens is that loop vectorization is disabled, because we get loop header count 0. I.e. loop_header: if (i < n) goto exit; loop_body: ... vectorizable computation ... The reason is that "if (i < 0)" statement actually has 0 profile in AFDO feedback. This seems common and I believe it is an issue with debug info in loop vecotrizer. Because loop is vectorized during train run, the conditoinal is replaced by vectorized loop conditional but the statement remains in the loop epilogue which is not executed at runtime. This is something we can fix and introduce debug statement in the vectorized loop body so user can breakpoint on it. I will try to produce testcase for that. However this patch fixes bug where I intended to only trust 0 counts from AFDO if they are also 0 in static profile and reversed the conditinal. autoprofile-bootstrapped/regtested x86_64-linux, comitted. * auto-profile.cc (afdo_set_bb_count): Dump also 0 count stmts. (afdo_annotate_cfg): Fix conditional for block having non-zero static profile. Diff: --- gcc/auto-profile.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index 9b5be665f58a..8a1d9f878c65 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -1315,7 +1315,7 @@ afdo_set_bb_count (basic_block bb, hash_set &zero_bbs) { if (info.count > max_count) max_count = info.count; - if (dump_file && info.count) + if (dump_file) { fprintf (dump_file, " count %" PRIu64 " in stmt: ", (int64_t)info.count); @@ -2108,7 +2108,7 @@ afdo_annotate_cfg (void) afdo samples, but if even static profile agrees with 0, consider it final so propagation works better. */ for (basic_block bb : zero_bbs) -if (bb->count.nonzero_p ()) +if (!bb->count.nonzero_p ()) { update_count_by_afdo_count (&bb->count, 0); set_bb_annotated (bb, &annotated_bb);
[gcc r16-1642] Copy discriminators when inlining
https://gcc.gnu.org/g:0235b6d41ace62064d5cd42553028136b49ad947 commit r16-1642-g0235b6d41ace62064d5cd42553028136b49ad947 Author: Jan Hubicka Date: Tue Jun 24 05:07:42 2025 +0200 Copy discriminators when inlining When inlining disciriminator info about the call statement is lost which is not good for auto-profile and debug info quality. This patch fixes it. gcc/ChangeLog: * tree-inline.cc (expand_call_inline): Preserve discriminator. Diff: --- gcc/tree-inline.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc index dee2dfc26206..7e0ac698e5e0 100644 --- a/gcc/tree-inline.cc +++ b/gcc/tree-inline.cc @@ -5018,6 +5018,9 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, loc = LOCATION_LOCUS (DECL_SOURCE_LOCATION (fn)); if (loc == UNKNOWN_LOCATION) loc = BUILTINS_LOCATION; + if (has_discriminator (gimple_location (stmt))) + loc = location_with_discriminator + (loc, get_discriminator_from_loc (gimple_location (stmt))); id->block = make_node (BLOCK); BLOCK_ABSTRACT_ORIGIN (id->block) = DECL_ORIGIN (fn); BLOCK_SOURCE_LOCATION (id->block) = loc;
[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: Add support for MVE Tail-Predicated Low Overhead Loops
https://gcc.gnu.org/g:02a4b4167f6d383383e08d8cbee718baf8a566ee commit 02a4b4167f6d383383e08d8cbee718baf8a566ee Author: Andre Vieira Date: Wed Jun 19 17:05:55 2024 +0100 arm: Add support for MVE Tail-Predicated Low Overhead Loops This patch adds support for MVE Tail-Predicated Low Overhead Loops by using the doloop funcitonality added to support predicated vectorized hardware loops. gcc/ChangeLog: * config/arm/arm-protos.h (arm_target_bb_ok_for_lob): Change declaration to pass basic_block. (arm_attempt_dlstp_transform): New declaration. * config/arm/arm.cc (TARGET_LOOP_UNROLL_ADJUST): Define targethook. (TARGET_PREDICT_DOLOOP_P): Likewise. (arm_target_bb_ok_for_lob): Adapt condition. (arm_mve_get_vctp_lanes): New function. (arm_dl_usage_type): New internal enum. (arm_get_required_vpr_reg): New function. (arm_get_required_vpr_reg_param): New function. (arm_get_required_vpr_reg_ret_val): New function. (arm_mve_get_loop_vctp): New function. (arm_mve_insn_predicated_by): New function. (arm_mve_across_lane_insn_p): New function. (arm_mve_load_store_insn_p): New function. (arm_mve_impl_pred_on_outputs_p): New function. (arm_mve_impl_pred_on_inputs_p): New function. (arm_last_vect_def_insn): New function. (arm_mve_impl_predicated_p): New function. (arm_mve_check_reg_origin_is_num_elems): New function. (arm_mve_dlstp_check_inc_counter): New function. (arm_mve_dlstp_check_dec_counter): New function. (arm_mve_loop_valid_for_dlstp): New function. (arm_predict_doloop_p): New function. (arm_loop_unroll_adjust): New function. (arm_emit_mve_unpredicated_insn_to_seq): New function. (arm_attempt_dlstp_transform): New function. * config/arm/arm.opt (mdlstp): New option. * config/arm/iterators.md (dlstp_elemsize, letp_num_lanes, letp_num_lanes_neg, letp_num_lanes_minus_1): New attributes. (DLSTP, LETP): New iterators. * config/arm/mve.md (predicated_doloop_end_internal, dlstp_insn): New insn patterns. * config/arm/thumb2.md (doloop_end): Adapt to support tail-predicated loops. (doloop_begin): Likewise. * config/arm/types.md (mve_misc): New mve type to represent predicated_loop_end insn sequences. * config/arm/unspecs.md: (DLSTP8, DLSTP16, DLSTP32, DSLTP64, LETP8, LETP16, LETP32, LETP64): New unspecs for DLSTP and LETP. gcc/testsuite/ChangeLog: * gcc.target/arm/lob.h: Add new helpers. * gcc.target/arm/lob1.c: Use new helpers. * gcc.target/arm/lob6.c: Likewise. * gcc.target/arm/mve/dlstp-compile-asm-1.c: New test. * gcc.target/arm/mve/dlstp-compile-asm-2.c: New test. * gcc.target/arm/mve/dlstp-compile-asm-3.c: New test. * gcc.target/arm/mve/dlstp-int8x16.c: New test. * gcc.target/arm/mve/dlstp-int8x16-run.c: New test. * gcc.target/arm/mve/dlstp-int16x8.c: New test. * gcc.target/arm/mve/dlstp-int16x8-run.c: New test. * gcc.target/arm/mve/dlstp-int32x4.c: New test. * gcc.target/arm/mve/dlstp-int32x4-run.c: New test. * gcc.target/arm/mve/dlstp-int64x2.c: New test. * gcc.target/arm/mve/dlstp-int64x2-run.c: New test. * gcc.target/arm/mve/dlstp-invalid-asm.c: New test. Co-authored-by: Stam Markianos-Wright (cherry picked from commit 3dfc28dbbd21b1d708aa40064380ef4c42c994d7) Diff: --- gcc/config/arm/arm-protos.h|4 +- gcc/config/arm/arm.cc | 1249 +++- gcc/config/arm/arm.opt |3 + gcc/config/arm/iterators.md| 15 + gcc/config/arm/mve.md | 50 + gcc/config/arm/thumb2.md | 138 ++- gcc/config/arm/types.md|6 +- gcc/config/arm/unspecs.md | 14 +- gcc/testsuite/gcc.target/arm/lob.h | 128 +- gcc/testsuite/gcc.target/arm/lob1.c| 23 +- gcc/testsuite/gcc.target/arm/lob6.c|8 +- .../gcc.target/arm/mve/dlstp-compile-asm-1.c | 146 +++ .../gcc.target/arm/mve/dlstp-compile-asm-2.c | 749 .../gcc.target/arm/mve/dlstp-compile-asm-3.c | 46 + .../gcc.target/arm/mve/dlstp-int16x8-run.c | 44 + gcc/testsuite/gcc.target/arm/mve/dlstp-int16x8.c | 31 + .../gcc.target/arm/mve/dlstp-int32x4-run.c | 45 + gcc/testsuite/gcc.target/arm/mve/dlstp-int32x4.c | 31 + .../gcc.
[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: Fix missed CE optimization for armv8.1-m.main [PR 116444]
https://gcc.gnu.org/g:55e31fad500907f24deb88670a7071830432a2b1 commit 55e31fad500907f24deb88670a7071830432a2b1 Author: Andre Vieira Date: Fri Oct 4 13:43:46 2024 +0100 arm: Fix missed CE optimization for armv8.1-m.main [PR 116444] This patch restores missed optimizations for armv8.1-m.main targets that were missed when the generation of csinc, csinv and csneg were enabled for the same with patch series containing: commit c2bb84be4a6e581bbf45891457ee632a07416982 Author: Sudi Das Date: Fri Sep 18 15:47:46 2020 +0100 [PATCH 2/5][Arm] New pattern for CSINV instructions The original patch series makes use of the "noce" machinery to transform RTL into patterns that later match the Armv8.1-M Mainline, by getting the target hook TARGET_HAVE_CONDITIONAL_EXECUTION, to return FALSE for such targets prior to reload_completed. The same machinery however was transforming other RTL patterns which were later on causing the "ce" pass post reload_completed to no longer optimize conditional execution opportunities, which was causing the regression observed in PR target/116444, a regression of 'testsuite/gcc.target/arm/thumb-ifcvt-2.c' when ran for an Armv8.1-M Mainline target. This patch implements the target hook TARGET_NOCE_CONVERSION_PROFITABLE_P to only allow "noce" to generate patterns that match CSINV, CSINC and CSNEG. Thus ensuring that the early "ce" passes do not ruin things for later ones. gcc/ChangeLog: PR target/116444 * config/arm/arm-protos.h (arm_noce_conversion_profitable_p): New declaration. * config/arm/arm.cc (arm_is_v81m_cond_insn): New helper function used in ... (arm_noce_conversion_profitable_p): ... here. New function to implement ... (TARGET_NOCE_PROFITABLE_P): ... this target hook. New define. (cherry picked from commit 7766a2c1eb683352ce117e8ed014665f392f) Diff: --- gcc/config/arm/arm-protos.h | 1 + gcc/config/arm/arm.cc | 87 + 2 files changed, 88 insertions(+) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index c650e4298a83..c25b193315b5 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -211,6 +211,7 @@ extern bool arm_pad_reg_upward (machine_mode, tree, int); #endif extern int arm_apply_result_size (void); extern opt_machine_mode arm_get_mask_mode (machine_mode mode); +extern bool arm_noce_conversion_profitable_p (rtx_insn *,struct noce_if_info *); #endif /* RTX_CODE */ diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index e5983242009f..cbbe67eb598a 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -835,6 +835,9 @@ static const scoped_attribute_specs *const arm_attribute_table[] = #undef TARGET_MODES_TIEABLE_P #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p +#undef TARGET_NOCE_CONVERSION_PROFITABLE_P +#define TARGET_NOCE_CONVERSION_PROFITABLE_P arm_noce_conversion_profitable_p + #undef TARGET_CAN_CHANGE_MODE_CLASS #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class @@ -36173,6 +36176,90 @@ arm_get_mask_mode (machine_mode mode) return default_get_mask_mode (mode); } +/* Helper function to determine whether SEQ represents a sequence of + instructions representing the Armv8.1-M Mainline conditional arithmetic + instructions: csinc, csneg and csinv. The cinc instruction is generated + using a different mechanism. */ + +static bool +arm_is_v81m_cond_insn (rtx_insn *seq) +{ + rtx_insn *curr_insn = seq; + rtx set; + /* The pattern may start with a simple set with register operands. Skip + through any of those. */ + while (curr_insn) +{ + set = single_set (curr_insn); + if (!set + || !REG_P (SET_DEST (set))) + return false; + + if (!REG_P (SET_SRC (set))) + break; + curr_insn = NEXT_INSN (curr_insn); +} + + if (!set) +return false; + + /* The next instruction should be one of: + NEG: for csneg, + PLUS: for csinc, + NOT: for csinv. */ + if (GET_CODE (SET_SRC (set)) != NEG + && GET_CODE (SET_SRC (set)) != PLUS + && GET_CODE (SET_SRC (set)) != NOT) +return false; + + curr_insn = NEXT_INSN (curr_insn); + if (!curr_insn) +return false; + + /* The next instruction should be a COMPARE. */ + set = single_set (curr_insn); + if (!set + || !REG_P (SET_DEST (set)) + || GET_CODE (SET_SRC (set)) != COMPARE) +return false; + + curr_insn = NEXT_INSN (curr_insn); + if (!curr_insn) +return false; + + /* And the last instruction should be an IF_THEN_ELSE. */ + set = single_set (curr_insn); + if (!set + || !REG_P (SET_DEST (set)) + || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE) +return false; + + return !NEXT_INSN (curr_insn); +} + +/* For Armv8.1-M Mainline we have b
[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: Improvements to arm_noce_conversion_profitable_p call [PR 116444]
https://gcc.gnu.org/g:741aded788931ec98d29feafd5c382ebbbcaa867 commit 741aded788931ec98d29feafd5c382ebbbcaa867 Author: Andre Simoes Dias Vieira Date: Fri Nov 8 13:34:57 2024 + arm: Improvements to arm_noce_conversion_profitable_p call [PR 116444] When not dealing with the special armv8.1-m.main conditional instructions case make sure it uses the default_noce_conversion_profitable_p call to determine whether the sequence is cost effective. Also make sure arm_noce_conversion_profitable_p accepts vsel patterns for Armv8.1-M Mainline targets. gcc/ChangeLog: PR target/116444 * config/arm/arm.cc (arm_noce_conversion_profitable_p): Call default_noce_conversion_profitable_p when not dealing with the armv8.1-m.main special case. (arm_is_vsel_fp_insn): New function. (cherry picked from commit 1e8396464cb990d554c932cd959742b86660a25a) Diff: --- gcc/config/arm/arm.cc | 59 --- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index d85dc7b8cf31..1a43784daee2 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -36176,10 +36176,58 @@ arm_get_mask_mode (machine_mode mode) return default_get_mask_mode (mode); } +/* Helper function to determine whether SEQ represents a sequence of + instructions representing the vsel floating point instructions. + This is an heuristic to check whether the proposed optimisation is desired, + the choice has no consequence for correctness. */ +static bool +arm_is_vsel_fp_insn (rtx_insn *seq) +{ + rtx_insn *curr_insn = seq; + rtx set = NULL_RTX; + /* The pattern may start with a simple set with register operands. Skip + through any of those. */ + while (curr_insn) +{ + set = single_set (curr_insn); + if (!set + || !REG_P (SET_DEST (set))) + return false; + + if (!REG_P (SET_SRC (set))) + break; + curr_insn = NEXT_INSN (curr_insn); +} + + if (!set) +return false; + + /* The next instruction should be a compare. */ + if (!REG_P (SET_DEST (set)) + || GET_CODE (SET_SRC (set)) != COMPARE) +return false; + + curr_insn = NEXT_INSN (curr_insn); + if (!curr_insn) +return false; + + /* And the last instruction should be an IF_THEN_ELSE. */ + set = single_set (curr_insn); + if (!set + || !REG_P (SET_DEST (set)) + || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE) +return false; + + return !NEXT_INSN (curr_insn); +} + + /* Helper function to determine whether SEQ represents a sequence of instructions representing the Armv8.1-M Mainline conditional arithmetic instructions: csinc, csneg and csinv. The cinc instruction is generated - using a different mechanism. */ + using a different mechanism. + This is an heuristic to check whether the proposed optimisation is desired, + the choice has no consequence for correctness. */ static bool arm_is_v81m_cond_insn (rtx_insn *seq) @@ -36248,15 +36296,20 @@ arm_is_v81m_cond_insn (rtx_insn *seq) hook to only allow "noce" to generate the patterns that are profitable. */ bool -arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *) +arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) { if (!TARGET_COND_ARITH || reload_completed) -return true; +return default_noce_conversion_profitable_p (seq, if_info); if (arm_is_v81m_cond_insn (seq)) return true; + /* Look for vsel opportunities as we still want to codegen these for + Armv8.1-M Mainline targets. */ + if (arm_is_vsel_fp_insn (seq)) +return true; + return false; }
[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: fix bootstrap issue with arm_noce_conversion_profitable_p patch [NFC]
https://gcc.gnu.org/g:613edf1adfb77842a277fc556892e0938e6af39f commit 613edf1adfb77842a277fc556892e0938e6af39f Author: Andre Vieira Date: Mon Oct 7 14:16:38 2024 +0100 arm: fix bootstrap issue with arm_noce_conversion_profitable_p patch [NFC] This obvious patch fixes two warnings introduced with the implementation of arm_noce_conversion_profitable_p hook. gcc/ChangeLog: * config/arm/arm.cc (arm_noce_oncersion_profitable_p): Remove unused argument name. (arm_is_v81m_cond_insn): Initialize variable. (cherry picked from commit 5fb1ab539e3315175d2e843f4ce40bde6dd7c520) Diff: --- gcc/config/arm/arm.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index cbbe67eb598a..d85dc7b8cf31 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -36185,7 +36185,7 @@ static bool arm_is_v81m_cond_insn (rtx_insn *seq) { rtx_insn *curr_insn = seq; - rtx set; + rtx set = NULL_RTX; /* The pattern may start with a simple set with register operands. Skip through any of those. */ while (curr_insn) @@ -36248,7 +36248,7 @@ arm_is_v81m_cond_insn (rtx_insn *seq) hook to only allow "noce" to generate the patterns that are profitable. */ bool -arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) +arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *) { if (!TARGET_COND_ARITH || reload_completed)
[gcc(refs/vendors/ARM/heads/arm-14.3)] doloop: Add support for predicated vectorized loops
https://gcc.gnu.org/g:01494aa8d6e79ca19d26a59fb55035a2649d9238 commit 01494aa8d6e79ca19d26a59fb55035a2649d9238 Author: Andre Vieira Date: Wed Jun 19 17:05:45 2024 +0100 doloop: Add support for predicated vectorized loops This patch adds support in the target agnostic doloop pass for the detection of predicated vectorized hardware loops. Arm is currently the only target that will make use of this feature. gcc/ChangeLog: * df-core.cc (df_bb_regno_only_def_find): New helper function. * df.h (df_bb_regno_only_def_find): Declare new function. * loop-doloop.cc (doloop_condition_get): Add support for detecting predicated vectorized hardware loops. (doloop_modify): Add support for GTU condition checks. (doloop_optimize): Update costing computation to support alterations to desc->niter_expr by the backend. Co-authored-by: Stam Markianos-Wright (cherry picked from commit 5d0c1b4e0d33c2d1077264636d0a65ce206d0d96) Diff: --- gcc/df-core.cc | 15 + gcc/df.h | 1 + gcc/loop-doloop.cc | 164 +++-- 3 files changed, 113 insertions(+), 67 deletions(-) diff --git a/gcc/df-core.cc b/gcc/df-core.cc index f0eb4c93957f..b0e8a88d433b 100644 --- a/gcc/df-core.cc +++ b/gcc/df-core.cc @@ -1964,6 +1964,21 @@ df_bb_regno_last_def_find (basic_block bb, unsigned int regno) return NULL; } +/* Return the one and only def of REGNO within BB. If there is no def or + there are multiple defs, return NULL. */ + +df_ref +df_bb_regno_only_def_find (basic_block bb, unsigned int regno) +{ + df_ref temp = df_bb_regno_first_def_find (bb, regno); + if (!temp) +return NULL; + else if (temp == df_bb_regno_last_def_find (bb, regno)) +return temp; + else +return NULL; +} + /* Finds the reference corresponding to the definition of REG in INSN. DF is the dataflow object. */ diff --git a/gcc/df.h b/gcc/df.h index 84e5aa8b524d..c4e690b40cf2 100644 --- a/gcc/df.h +++ b/gcc/df.h @@ -987,6 +987,7 @@ extern void df_check_cfg_clean (void); #endif extern df_ref df_bb_regno_first_def_find (basic_block, unsigned int); extern df_ref df_bb_regno_last_def_find (basic_block, unsigned int); +extern df_ref df_bb_regno_only_def_find (basic_block, unsigned int); extern df_ref df_find_def (rtx_insn *, rtx); extern bool df_reg_defined (rtx_insn *, rtx); extern df_ref df_find_use (rtx_insn *, rtx); diff --git a/gcc/loop-doloop.cc b/gcc/loop-doloop.cc index 0d101d64bbfc..7528a9225557 100644 --- a/gcc/loop-doloop.cc +++ b/gcc/loop-doloop.cc @@ -86,10 +86,10 @@ doloop_condition_get (rtx_insn *doloop_pat) forms: 1) (parallel [(set (pc) (if_then_else (condition) - (label_ref (label)) - (pc))) -(set (reg) (plus (reg) (const_int -1))) -(additional clobbers and uses)]) + (label_ref (label)) + (pc))) +(set (reg) (plus (reg) (const_int -1))) +(additional clobbers and uses)]) The branch must be the first entry of the parallel (also required by jump.cc), and the second entry of the parallel must be a set of @@ -97,19 +97,33 @@ doloop_condition_get (rtx_insn *doloop_pat) the loop counter in an if_then_else too. 2) (set (reg) (plus (reg) (const_int -1)) - (set (pc) (if_then_else (reg != 0) -(label_ref (label)) -(pc))). +(set (pc) (if_then_else (reg != 0) +(label_ref (label)) +(pc))). - Some targets (ARM) do the comparison before the branch, as in the + 3) Some targets (Arm) do the comparison before the branch, as in the following form: - 3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0))) - (set (reg) (plus (reg) (const_int -1)))]) -(set (pc) (if_then_else (cc == NE) -(label_ref (label)) -(pc))) */ - + (parallel [(set (cc) (compare (plus (reg) (const_int -1)) 0)) + (set (reg) (plus (reg) (const_int -1)))]) + (set (pc) (if_then_else (cc == NE) +(label_ref (label)) +(pc))) + + 4) This form supports a construct that is used to represent a vectorized + do loop with predication, however we do not need to care about the + details of the predication here. + Arm uses this construct to support MVE tail predication. + + (parallel + [(set (pc) +(if_then_else (gtu (plus (reg) (const_int -n)) + (const_int n-1)) + (label_ref) +
[gcc(refs/vendors/ARM/heads/arm-14.3)] arm, mve: Fix arm_mve_dlstp_check_dec_counter's use of single_pred
https://gcc.gnu.org/g:e9a0a09b15bfaa706c6f5b0b1d29d81942096af7 commit e9a0a09b15bfaa706c6f5b0b1d29d81942096af7 Author: Andre Vieira Date: Wed Nov 20 09:23:50 2024 + arm, mve: Fix arm_mve_dlstp_check_dec_counter's use of single_pred Call 'single_pred_p' before 'single_pred' to verify it is safe to do so. gcc/ChangeLog: * config/arm/arm.cc (arm_mve_dlstp_check_dec_counter): Call single_pred_p to verify it's safe to call single_pred. gcc/testsuite/ChangeLog: * gcc.target/arm/mve/dlstp-loop-form.c: Add loop that triggered ICE. (cherry picked from commit 3ec0b7cd7ce7b64b58cb0ee52fb95fb0875c805c) Diff: --- gcc/config/arm/arm.cc | 5 +++-- gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c | 12 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 9185ce2c12ea..e5983242009f 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -35443,9 +35443,10 @@ arm_mve_dlstp_check_dec_counter (loop *loop, rtx_insn* vctp_insn, return NULL; else if (REG_P (condconst)) { - basic_block pre_loop_bb = single_pred (loop_preheader_edge (loop)->src); - if (!pre_loop_bb) + basic_block preheader_b = loop_preheader_edge (loop)->src; + if (!single_pred_p (preheader_b)) return NULL; + basic_block pre_loop_bb = single_pred (preheader_b); rtx initial_compare = NULL_RTX; if (!(prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb)) diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c index 2dc9c4f8bfe1..ca46bcb499a8 100644 --- a/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c +++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c @@ -24,3 +24,15 @@ void n() { } } +int a; +void g2() { + long b; + while (a) { +char *c; +for (long d = b; d > 0; d -= 4) { + mve_pred16_t e = vctp32q(d); + int32x4_t f; + vstrbq_p_s32(c, f, e); +} + } +}
[gcc] Created branch 'ARM/heads/arm-14.3' in namespace 'refs/vendors'
The branch 'ARM/heads/arm-14.3' was created in namespace 'refs/vendors' pointing to: c6ee55bf5766... arm, mve: Detect uses of vctp_vpr_generated inside subregs
[gcc(refs/vendors/ARM/heads/arm-14.3)] arm, mve: Detect uses of vctp_vpr_generated inside subregs
https://gcc.gnu.org/g:c6ee55bf5766d1d38e57d92e3a757fde4722d55d commit c6ee55bf5766d1d38e57d92e3a757fde4722d55d Author: Andre Vieira Date: Fri Nov 29 10:18:57 2024 + arm, mve: Detect uses of vctp_vpr_generated inside subregs Address a problem we were having where we were missing on detecting uses of vctp_vpr_generated in the analysis for 'arm_attempt_dlstp_transform' because the use was inside a SUBREG and rtx_equal_p does not catch that. Using reg_overlap_mentioned_p is much more robust. gcc/ChangeLog: PR target/117814 * config/arm/arm.cc (arm_attempt_dlstp_transform): Use reg_overlap_mentioned_p instead of rtx_equal_p to detect uses of vctp_vpr_generated inside subregs. gcc/testsuite/ChangeLog: PR target/117814 * gcc.target/arm/mve/dlstp-invalid-asm.c (test10): Renamed to... (test10a): ... this. (test10b): Variation of test10a with a small change to trigger wrong codegen. (cherry picked from commit f42fd8e9335354f986d69b92ab66be07cc31bc7a) Diff: --- gcc/config/arm/arm.cc | 3 +- .../gcc.target/arm/mve/dlstp-invalid-asm.c | 37 -- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 1a43784daee2..e6d5c86a8bed 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -35915,7 +35915,8 @@ arm_attempt_dlstp_transform (rtx label) df_ref insn_uses = NULL; FOR_EACH_INSN_USE (insn_uses, insn) { - if (rtx_equal_p (vctp_vpr_generated, DF_REF_REG (insn_uses))) + if (reg_overlap_mentioned_p (vctp_vpr_generated, +DF_REF_REG (insn_uses))) { end_sequence (); return 1; diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c index 26df2d30523c..eb0782ebd0de 100644 --- a/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c +++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-invalid-asm.c @@ -127,8 +127,15 @@ void test9 (int32_t *a, int32_t *b, int32_t *c, int n) } } -/* Using a VPR that gets re-generated within the loop. */ -void test10 (int32_t *a, int32_t *b, int32_t *c, int n) +/* Using a VPR that gets re-generated within the loop. Even though we + currently reject such loops, it would be possible to dlstp transform this + specific loop, as long as we make sure that the first vldrwq_z mask would + either: + * remain the same as its mask in the first iteration, + * become the same as the loop mask after the first iteration, + * become all ones, since the dlstp would then mask it the same as the loop + mask. */ +void test10a (int32_t *a, int32_t *b, int32_t *c, int n) { mve_pred16_t p = vctp32q (n); while (n > 0) @@ -145,6 +152,32 @@ void test10 (int32_t *a, int32_t *b, int32_t *c, int n) } } +/* Using a VPR that gets re-generated within the loop, the difference between + this test and test10a is to make sure the two vctp calls are never the same, + this leads to slightly different codegen in some cases triggering the issue + in a different way. This loop too would be OK to dlstp transform as long + as we made sure that the first vldrwq_z mask would either: + * remain the same as the its mask in the first iteration, + * become the same as the loop mask after the first iteration, + * become all ones, since the dlstp would then mask it the same as the loop + mask. */ +void test10b (int32_t *a, int32_t *b, int32_t *c, int n) +{ + mve_pred16_t p = vctp32q (n-4); + while (n > 0) +{ + int32x4_t va = vldrwq_z_s32 (a, p); + p = vctp32q (n); + int32x4_t vb = vldrwq_z_s32 (b, p); + int32x4_t vc = vaddq_x_s32 (va, vb, p); + vstrwq_p_s32 (c, vc, p); + c += 4; + a += 4; + b += 4; + n -= 4; +} +} + /* Using vctp32q_m instead of vctp32q. */ void test11 (int32_t *a, int32_t *b, int32_t *c, int n, mve_pred16_t p0) {
[gcc(refs/vendors/ARM/heads/arm-14.3)] arm, mve: Fix scan-assembler for test7 in dlstp-compile-asm-2.c
https://gcc.gnu.org/g:653b1c38434272be4a1327efde6dd8f8b619d21c commit 653b1c38434272be4a1327efde6dd8f8b619d21c Author: Andre Vieira Date: Fri Nov 29 09:59:25 2024 + arm, mve: Fix scan-assembler for test7 in dlstp-compile-asm-2.c After the changes to the vctp intrinsic codegen changed slightly, where we now unfortunately seem to be generating unneeded moves and extends of the mask. These are however not incorrect and we don't have a fix for the unneeded codegen right now, so changing the testcase to accept them so we can catch other changes if they occur. gcc/testsuite/ChangeLog: PR target/117814 * gcc.target/arm/mve/dlstp-compile-asm-2.c (test7): Add an optional vmsr to the check-function-bodies. (cherry picked from commit cf75f86ed980308621ab0db0dc8adc9c72e39f5e) Diff: --- gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c | 5 + 1 file changed, 5 insertions(+) diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c index 84f4a2fc4f9b..2d282cb6645e 100644 --- a/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c +++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-compile-asm-2.c @@ -214,7 +214,12 @@ void test7 (int32_t *a, int32_t *b, int32_t *c, int n, int g) **... ** dlstp.32lr, r3 ** vldrw.32q[0-9]+, \[r0\], #16 +** ( +** vmsrp0, .* ** vpst +** | +** vpst +** ) ** vldrwt.32 q[0-9]+, \[r1\], #16 ** vadd.i32(q[0-9]+), q[0-9]+, q[0-9]+ ** vstrw.32\1, \[r2\], #16
[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: Fix testism with mve/ivopts-3.c testcase
https://gcc.gnu.org/g:7a83f663642eecd29d4b6b72c65cb92571411a47 commit 7a83f663642eecd29d4b6b72c65cb92571411a47 Author: Andre Vieira Date: Fri Aug 2 16:39:34 2024 +0100 arm: Fix testism with mve/ivopts-3.c testcase This patch ensures this testcase is ran for armv8.1-m.main+mve as this is testing that doloops with function calls that aren't intrinsics get rejected as potential doloop targets during ivopts. For other targets this loop gets rejected for different reasons. gcc/testsuite/ChangeLog: * gcc.target/arm/mve/ivopts-3.c: Add require target and options. (cherry picked from commit 995ac87a053c22364bcdc0bc041fd6e5b3087bc5) Diff: --- gcc/testsuite/gcc.target/arm/mve/ivopts-3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c b/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c index 19b2442ef12c..08879424501f 100644 --- a/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c +++ b/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c @@ -1,5 +1,7 @@ /* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ /* { dg-options "-O2 -fdump-tree-ivopts-details" } */ +/* { dg-add-options arm_v8_1m_mve } */ void f2 (void);
[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: make arm_predict_doloop_p reject loops with calls
https://gcc.gnu.org/g:3db8647388acfb981dfe1ea704ac1b8ae3a83e93 commit 3db8647388acfb981dfe1ea704ac1b8ae3a83e93 Author: Andre Vieira Date: Wed Jun 26 11:07:01 2024 +0100 arm: make arm_predict_doloop_p reject loops with calls With the introduction of low overhead loops we defined arm_predict_doloop_p, this is meant to be a low-weight check to rule out loops we are not considering for doloop optimization and it is used by other passes to prevent optimizations that may hurt the doloop optimization later on. The reason these are meant to be lightweight is because it's used by pre-RTL optimizations, meaning we can't do the same checks that doloop does. After the definition of arm_predict_doloop_p, when testing for armv8.1-m.main, tree-ssa/ivopts-3.c failed the scan-dump check as the dump now matched an extra '!= 0' introduced by: Doloop cmp iv use: if (ivtmp_1 != 0) Predict loop 1 can perform doloop optimization later. where previously we had: Predict doloop failure due to target specific checks. and after this patch: Predict doloop failure due to call in loop. Predict doloop failure due to target specific checks. Added a copy of the original tree-ssa/ivopts-3.c as a target specifc test to check for the new dump message. gcc/ChangeLog: * config/arm/arm.cc (arm_predict_doloop_p): Reject loops with function calls that are not builtins. gcc/testsuite/ChangeLog: * gcc.target/arm/mve/ivopts-3.c: New test. (cherry picked from commit ad20ad7dddcb052429346ae5f94b4a603925084a) Diff: --- gcc/config/arm/arm.cc | 16 gcc/testsuite/gcc.target/arm/mve/ivopts-3.c | 13 + 2 files changed, 29 insertions(+) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index bc5048a787ed..e296b0e8a33a 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -35613,6 +35613,22 @@ arm_predict_doloop_p (struct loop *loop) " loop bb complexity.\n"); return false; } + else +{ + gimple_stmt_iterator gsi = gsi_after_labels (loop->header); + while (!gsi_end_p (gsi)) + { + if (is_gimple_call (gsi_stmt (gsi)) + && !gimple_call_builtin_p (gsi_stmt (gsi))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Predict doloop failure due to" + " call in loop.\n"); + return false; + } + gsi_next (&gsi); + } +} return true; } diff --git a/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c b/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c new file mode 100644 index ..19b2442ef12c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/ivopts-3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-ivopts-details" } */ + +void f2 (void); + +int main (void) +{ + int i; + for (i = 0; i < 10; i++) +f2 (); +} + +/* { dg-final { scan-tree-dump "Predict doloop failure due to call in loop." "ivopts" } } */
[gcc(refs/vendors/ARM/heads/arm-14.3)] arm: Prevent ICE when doloop dec_set is not PLUS expr
https://gcc.gnu.org/g:8d44f91e835e1d7e4dd99d5b900bd9c4c2d2586b commit 8d44f91e835e1d7e4dd99d5b900bd9c4c2d2586b Author: Andre Vieira Date: Tue Jul 16 17:47:51 2024 +0100 arm: Prevent ICE when doloop dec_set is not PLUS expr This patch refactors and fixes an issue where arm_mve_dlstp_check_dec_counter was making an assumption about the form of what a candidate for a dec_insn should be, which caused an ICE. This dec_insn is the instruction that decreases the loop counter inside a decrementing loop and we expect it to have the following form: (set (reg CONDCOUNT) (plus (reg CONDCOUNT) (const_int))) Where CONDCOUNT is the loop counter, and const int is the negative constant used to decrement it. This patch also improves our search for a valid dec_insn. Before this patch we'd only look for a dec_insn inside the loop header if the loop latch was empty. We now also search the loop header if the loop latch is not empty but the last instruction is not a valid dec_insn. This could potentially be improved to search all instructions inside the loop latch. gcc/ChangeLog: * config/arm/arm.cc (check_dec_insn): New helper function containing code hoisted from... (arm_mve_dlstp_check_dec_counter): ... here. Use check_dec_insn to check the validity of the candidate dec_insn. gcc/testsuite/ChangeLog: * gcc.targer/arm/mve/dlstp-loop-form.c: New test. (cherry picked from commit c78790988f7c428489292c5f9b3f80363f78f73d) Changed testcase to use include after cherry-picking. Diff: --- gcc/config/arm/arm.cc | 49 ++ gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c | 26 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index e296b0e8a33a..9185ce2c12ea 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -35314,6 +35314,32 @@ arm_mve_dlstp_check_inc_counter (loop *loop, rtx_insn* vctp_insn, return vctp_insn; } +/* Helper function to 'arm_mve_dlstp_check_dec_counter' to make sure DEC_INSN + is of the expected form: + (set (reg a) (plus (reg a) (const_int))) + where (reg a) is the same as CONDCOUNT. + Return a rtx with the set if it is in the right format or NULL_RTX + otherwise. */ + +static rtx +check_dec_insn (rtx_insn *dec_insn, rtx condcount) +{ + if (!NONDEBUG_INSN_P (dec_insn)) +return NULL_RTX; + rtx dec_set = single_set (dec_insn); + if (!dec_set + || !REG_P (SET_DEST (dec_set)) + || GET_CODE (SET_SRC (dec_set)) != PLUS + || !REG_P (XEXP (SET_SRC (dec_set), 0)) + || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1)) + || REGNO (SET_DEST (dec_set)) + != REGNO (XEXP (SET_SRC (dec_set), 0)) + || REGNO (SET_DEST (dec_set)) != REGNO (condcount)) +return NULL_RTX; + + return dec_set; +} + /* Helper function to `arm_mve_loop_valid_for_dlstp`. In the case of a counter that is decrementing, ensure that it is decrementing by the right amount in each iteration and that the target condition is what @@ -35330,30 +35356,19 @@ arm_mve_dlstp_check_dec_counter (loop *loop, rtx_insn* vctp_insn, loop latch. Here we simply need to verify that this counter is the same reg that is also used in the vctp_insn and that it is not otherwise modified. */ - rtx_insn *dec_insn = BB_END (loop->latch); + rtx dec_set = check_dec_insn (BB_END (loop->latch), condcount); /* If not in the loop latch, try to find the decrement in the loop header. */ - if (!NONDEBUG_INSN_P (dec_insn)) + if (dec_set == NULL_RTX) { df_ref temp = df_bb_regno_only_def_find (loop->header, REGNO (condcount)); /* If we haven't been able to find the decrement, bail out. */ if (!temp) return NULL; -dec_insn = DF_REF_INSN (temp); - } - - rtx dec_set = single_set (dec_insn); +dec_set = check_dec_insn (DF_REF_INSN (temp), condcount); - /* Next, ensure that it is a PLUS of the form: - (set (reg a) (plus (reg a) (const_int))) - where (reg a) is the same as condcount. */ - if (!dec_set - || !REG_P (SET_DEST (dec_set)) - || !REG_P (XEXP (SET_SRC (dec_set), 0)) - || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1)) - || REGNO (SET_DEST (dec_set)) - != REGNO (XEXP (SET_SRC (dec_set), 0)) - || REGNO (SET_DEST (dec_set)) != REGNO (condcount)) -return NULL; +if (dec_set == NULL_RTX) + return NULL; + } decrementnum = INTVAL (XEXP (SET_SRC (dec_set), 1)); diff --git a/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c new file mode 100644 index ..2dc9c4f8bfe1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/dlstp-loop-form.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target a
[gcc/aoliva/heads/testme] [lra] catch all to-sp eliminations
The branch 'aoliva/heads/testme' was updated to point to: efab6fca... [lra] catch all to-sp eliminations It previously pointed to: 62b20bad36bd... [lra] catch all to-sp eliminations Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- 62b20ba... [lra] catch all to-sp eliminations Summary of changes (added commits): --- efa... [lra] catch all to-sp eliminations (*) (*) This commit already exists in another branch. Because the reference `refs/users/aoliva/heads/testme' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc] Created branch 'aoliva/heads/lra-elim-fp2sp' in namespace 'refs/users'
The branch 'aoliva/heads/lra-elim-fp2sp' was created in namespace 'refs/users' pointing to: efab6fca... [lra] catch all to-sp eliminations
[gcc r16-1634] OpenACC: Add 'if' clause to 'acc wait' directive
https://gcc.gnu.org/g:2b077252cafa5045498a0e0c480ee6d48c136232 commit r16-1634-g2b077252cafa5045498a0e0c480ee6d48c136232 Author: Tobias Burnus Date: Mon Jun 23 23:24:56 2025 +0200 OpenACC: Add 'if' clause to 'acc wait' directive OpenACC 3.0 added the 'if' clause to four directives; this patch only adds it to 'acc wait'. gcc/c-family/ChangeLog: * c-omp.cc (c_finish_oacc_wait): Handle if clause. gcc/c/ChangeLog: * c-parser.cc (OACC_WAIT_CLAUSE_MASK): Add if clause. gcc/cp/ChangeLog: * parser.cc (OACC_WAIT_CLAUSE_MASK): Ass if clause. gcc/fortran/ChangeLog: * openmp.cc (OACC_WAIT_CLAUSES): Add if clause. * trans-openmp.cc (gfc_trans_oacc_wait_directive): Handle it. gcc/testsuite/ChangeLog: * c-c++-common/goacc/acc-wait-1.c: New test. * gfortran.dg/goacc/acc-wait-1.f90: New test. Diff: --- gcc/c-family/c-omp.cc | 9 - gcc/c/c-parser.cc | 3 +- gcc/cp/parser.cc | 3 +- gcc/fortran/openmp.cc | 2 +- gcc/fortran/trans-openmp.cc| 4 ++ gcc/testsuite/c-c++-common/goacc/acc-wait-1.c | 51 ++ gcc/testsuite/gfortran.dg/goacc/acc-wait-1.f90 | 47 7 files changed, 114 insertions(+), 5 deletions(-) diff --git a/gcc/c-family/c-omp.cc b/gcc/c-family/c-omp.cc index 13de2fe48f96..4352214df3b7 100644 --- a/gcc/c-family/c-omp.cc +++ b/gcc/c-family/c-omp.cc @@ -52,8 +52,8 @@ c_finish_oacc_wait (location_t loc, tree parms, tree clauses) vec_alloc (args, nparms + 2); stmt = builtin_decl_explicit (BUILT_IN_GOACC_WAIT); - if (omp_find_clause (clauses, OMP_CLAUSE_ASYNC)) -t = OMP_CLAUSE_ASYNC_EXPR (clauses); + if ((t = omp_find_clause (clauses, OMP_CLAUSE_ASYNC))) +t = OMP_CLAUSE_ASYNC_EXPR (t); else t = build_int_cst (integer_type_node, GOMP_ASYNC_SYNC); @@ -71,6 +71,11 @@ c_finish_oacc_wait (location_t loc, tree parms, tree clauses) stmt = build_call_expr_loc_vec (loc, stmt, args); + t = omp_find_clause (clauses, OMP_CLAUSE_IF); + if (t) +stmt = build3_loc (input_location, COND_EXPR, void_type_node, + OMP_CLAUSE_IF_EXPR (t), stmt, NULL_TREE); + vec_free (args); return stmt; diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index faa50a4fd86b..0c3e3e2889c6 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -22501,7 +22501,8 @@ c_parser_oacc_update (c_parser *parser) */ #define OACC_WAIT_CLAUSE_MASK \ - ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) ) + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF) ) static tree c_parser_oacc_wait (location_t loc, c_parser *parser, char *p_name) diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index cfebde8b1181..80fd7990 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -49704,7 +49704,8 @@ cp_parser_oacc_update (cp_parser *parser, cp_token *pragma_tok) */ #define OACC_WAIT_CLAUSE_MASK \ - ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC)) + ( (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_ASYNC) \ + | (OMP_CLAUSE_MASK_1 << PRAGMA_OACC_CLAUSE_IF)) static tree cp_parser_oacc_wait (cp_parser *parser, cp_token *pragma_tok) diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc index df829403c34f..fe0a47a6948b 100644 --- a/gcc/fortran/openmp.cc +++ b/gcc/fortran/openmp.cc @@ -4474,7 +4474,7 @@ error: | OMP_CLAUSE_COPYOUT | OMP_CLAUSE_DELETE | OMP_CLAUSE_FINALIZE\ | OMP_CLAUSE_DETACH) #define OACC_WAIT_CLAUSES \ - omp_mask (OMP_CLAUSE_ASYNC) + omp_mask (OMP_CLAUSE_ASYNC) | OMP_CLAUSE_IF #define OACC_ROUTINE_CLAUSES \ (omp_mask (OMP_CLAUSE_GANG) | OMP_CLAUSE_WORKER | OMP_CLAUSE_VECTOR\ | OMP_CLAUSE_SEQ \ diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 2a48d4af5276..a2e70fca0b37 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -6048,6 +6048,10 @@ gfc_trans_oacc_wait_directive (gfc_code *code) args->quick_push (gfc_convert_expr_to_tree (&block, el->expr)); stmt = build_call_expr_loc_vec (loc, stmt, args); + if (clauses->if_expr) +stmt = build3_loc (input_location, COND_EXPR, void_type_node, + gfc_convert_expr_to_tree (&block, clauses->if_expr), + stmt, NULL_TREE); gfc_add_expr_to_block (&block, stmt); vec_free (args); diff --git a/gcc/testsuite/c-c++-common/goacc/acc-wait-1.c b/gcc/testsuite/c-c++-common/goacc/acc-wait-1.c new file mode 100644 index ..bc7ff022f173 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/acc-wait-1.c @@ -
[gcc r16-1633] Fortran: fix checking of renamed-on-use interface name [PR120784]
https://gcc.gnu.org/g:6dd1659cf10a7ad51576f902ef3bc007db30c990 commit r16-1633-g6dd1659cf10a7ad51576f902ef3bc007db30c990 Author: Harald Anlauf Date: Mon Jun 23 21:33:40 2025 +0200 Fortran: fix checking of renamed-on-use interface name [PR120784] PR fortran/120784 gcc/fortran/ChangeLog: * interface.cc (gfc_match_end_interface): If a use-associated symbol is renamed, use the local_name for checking. gcc/testsuite/ChangeLog: * gfortran.dg/interface_63.f90: New test. Diff: --- gcc/fortran/interface.cc | 13 +-- gcc/testsuite/gfortran.dg/interface_63.f90 | 62 ++ 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/gcc/fortran/interface.cc b/gcc/fortran/interface.cc index b8542920ce79..cdb838d83368 100644 --- a/gcc/fortran/interface.cc +++ b/gcc/fortran/interface.cc @@ -452,11 +452,18 @@ gfc_match_end_interface (void) case INTERFACE_DTIO: case INTERFACE_GENERIC: + /* If a use-associated symbol is renamed, check the local_name. */ + const char *local_name = current_interface.sym->name; + + if (current_interface.sym->attr.use_assoc + && current_interface.sym->attr.use_rename + && current_interface.sym->ns->use_stmts->rename) + local_name = current_interface.sym->ns->use_stmts->rename->local_name; + if (type != current_interface.type - || strcmp (current_interface.sym->name, name) != 0) + || strcmp (local_name, name) != 0) { - gfc_error ("Expecting % at %C", -current_interface.sym->name); + gfc_error ("Expecting % at %C", local_name); m = MATCH_ERROR; } diff --git a/gcc/testsuite/gfortran.dg/interface_63.f90 b/gcc/testsuite/gfortran.dg/interface_63.f90 new file mode 100644 index ..a55e8ab431b1 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/interface_63.f90 @@ -0,0 +1,62 @@ +! { dg-do compile } +! PR fortran/120784 - fix checking of renamed-on-use interface name +! +! Contributed by Matt Thompson + +module A_mod + implicit none + + interface Get + procedure :: get_1 + procedure :: get_2 + end interface Get + +contains + + subroutine get_1(i) +integer :: i +i = 5 + end subroutine get_1 + + subroutine get_2(x) +real :: x +x = 4 + end subroutine get_2 +end module A_mod + +module B_mod + use A_mod, only : MyGet => Get + implicit none + + interface MyGet + procedure :: other_get + end interface MyGet + +contains + + subroutine other_get(c) +character(1) :: c +c = 'a' + end subroutine other_get + + subroutine check_get () +character :: c +integer :: i +real :: r +call myget (c) +call myget (i) +call myget (r) + end subroutine check_get + +end module B_MOD + +program p + use b_mod, only: myget + implicit none + character :: c + integer :: i + real :: r + call myget (c) + call myget (i) + call myget (r) +end
[gcc r16-1635] analyzer: fix missing "final override"
https://gcc.gnu.org/g:e6406aefd1a25b6dba845a52cfd9484188ff5720 commit r16-1635-ge6406aefd1a25b6dba845a52cfd9484188ff5720 Author: David Malcolm Date: Mon Jun 23 18:46:44 2025 -0400 analyzer: fix missing "final override" No functional change intended. gcc/analyzer/ChangeLog: * region-model.cc (exception_thrown_from_unrecognized_call::print): Add "final override" to vfunc. Signed-off-by: David Malcolm Diff: --- gcc/analyzer/region-model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index d1c7e8cd53ea..bc44d3da44b5 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -2090,7 +2090,7 @@ public: { } - void print (pretty_printer *pp) const + void print (pretty_printer *pp) const final override { if (m_fndecl) pp_printf (pp, "if %qD throws an exception...", m_fndecl);
[gcc r16-1636] libgdiagnostics: sarif-replay: add extra sinks via -fdiagnostics-add-output= [PR116792, PR116163]
https://gcc.gnu.org/g:d0142e147486e6f319704d35930720f6dec648fb commit r16-1636-gd0142e147486e6f319704d35930720f6dec648fb Author: David Malcolm Date: Mon Jun 23 18:46:51 2025 -0400 libgdiagnostics: sarif-replay: add extra sinks via -fdiagnostics-add-output= [PR116792,PR116163] This patch refactors the support for -fdiagnostics-add-output=SCHEME from GCC's options parsing so that it is also available to sarif-replay and to other clients of libgdiagnostics. With this users of sarif-replay and other such tools can generate HTML or SARIF as well as text output, using the same -fdiagnostics-add-output=SCHEME as GCC. As a test, the patch adds support for this option to the dg-lint script below "contrib". For example dg-lint can now generate text, html, and sarif output via: LD_LIBRARY_PATH=../build/gcc/ \ ./contrib/dg-lint/dg-lint \ contrib/dg-lint/test-*.c \ -fdiagnostics-add-output=experimental-html:file=dg-lint-tests.html \ -fdiagnostics-add-output=sarif:file=dg-lint-tests.sarif where the HTML output from dg-lint can be seen here: https://dmalcolm.fedorapeople.org/gcc/2025-06-20/dg-lint-tests.html the sarif output here: https://dmalcolm.fedorapeople.org/gcc/2025-06-23/dg-lint-tests.sarif and a screenshot of VS Code viewing the sarif output is here: https://dmalcolm.fedorapeople.org/gcc/2025-06-23/vscode-viewing-dg-lint-sarif-output.png As well as allowing sarif-replay to generate HTML, this patch allows sarif-replay to also generate SARIF. Ideally this would faithfully round-trip all the data, but it's not perfect (which I'm tracking as PR sarif-replay/120792). contrib/ChangeLog: PR other/116792 PR testsuite/116163 PR sarif-replay/120792 * dg-lint/dg-lint: Add -fdiagnostics-add-output. * dg-lint/libgdiagnostics.py: Add diagnostic_manager_add_sink_from_spec. (Manager.add_sink_from_spec): New. gcc/ChangeLog: PR other/116792 PR testsuite/116163 PR sarif-replay/120792 * Makefile.in (OBJS-libcommon): Add diagnostic-output-spec.o. * diagnostic-format-html.cc (html_builder::html_builder): Ensure title is non-empty. * diagnostic-output-spec.cc: New file, taken from material in opts-diagnostic.cc. * diagnostic-output-spec.h: New file. * diagnostic.cc (diagnostic_context::set_main_input_filename): New. * diagnostic.h (diagnostic_context::set_main_input_filename): New decl. * doc/libgdiagnostics/topics/compatibility.rst (LIBGDIAGNOSTICS_ABI_2): New. * doc/libgdiagnostics/topics/diagnostic-manager.rst (diagnostic_manager_add_sink_from_spec): New. (diagnostic_manager_set_analysis_target): New. * libgdiagnostics++.h (manager::add_sink_from_spec): New. (manager::set_analysis_target): New. * libgdiagnostics.cc: Include "diagnostic-output-spec.h". (struct spec_context): New. (diagnostic_manager_add_sink_from_spec): New. (diagnostic_manager_set_analysis_target): New. * libgdiagnostics.h (LIBDIAGNOSTICS_HAVE_diagnostic_manager_add_sink_from_spec): New define. (diagnostic_manager_add_sink_from_spec): New decl. (LIBDIAGNOSTICS_HAVE_diagnostic_manager_set_analysis_target): New define. (diagnostic_manager_set_analysis_target): New decl. * libgdiagnostics.map (LIBGDIAGNOSTICS_ABI_2): New. * libsarifreplay.cc (sarif_replayer::handle_artifact_obj): Looks for "analysisTarget" in roles and call set_analysis_target using the artifact if found. * opts-diagnostic.cc: Refactor, moving material to diagnostic-output-spec.cc. (struct opt_spec_context): New. (handle_OPT_fdiagnostics_add_output_): Use opt_spec_context. (handle_OPT_fdiagnostics_set_output_): Likewise. * sarif-replay.cc: Define INCLUDE_STRING. (struct options): Add m_extra_output_specs. (usage_msg): Add -fdiagnostics-add-output=SCHEME. (str_starts_with): New. (parse_options): Add -fdiagnostics-add-output=SCHEME. (main): Likewise. * selftest-run-tests.cc (selftest::run_tests): Call diagnostic_output_spec_cc_tests rather than opts_diagnostic_cc_tests. * selftest.h (selftest::diagnostic_output_spec_cc_tests): Replace... (selftest::opts_diagnostic_cc_tests): ...this. gcc/testsuite/ChangeLog: PR other/116792 PR testsuite/116163 PR sarif-replay/120792
[gcc r16-1644] x86: Extend the remove_redundant_vector pass
https://gcc.gnu.org/g:aba3b9d3a48a0703fd565f7c5f0caf604f59970b commit r16-1644-gaba3b9d3a48a0703fd565f7c5f0caf604f59970b Author: H.J. Lu Date: Fri May 9 07:17:07 2025 +0800 x86: Extend the remove_redundant_vector pass Extend the remove_redundant_vector pass to handle vector broadcasts from constant and variable scalars. When broadcasting from constants and function arguments, we can place a single widest vector broadcast at entry of the nearest common dominator for basic blocks with all uses since constants and function arguments aren't changed. For broadcast from variables with a single definition, the single definition is replaced with the widest broadcast. gcc/ PR target/92080 * config/i386/i386-expand.cc (ix86_expand_call): Set recursive_function to true for recursive call. * config/i386/i386-features.cc (ix86_place_single_vector_set): Add an argument for inner scalar, default to nullptr. Set the source from inner scalar if not nullptr. (ix86_get_vector_load_mode): Renamed to ... (ix86_get_vector_cse_mode): This. Add an argument for scalar mode and handle integer and float scalar modes. (replace_vector_const): Add an argument for scalar mode and pass it to ix86_get_vector_load_mode. (x86_cse_kind): New. (redundant_load): Likewise. (ix86_broadcast_inner): Likewise. (remove_redundant_vector_load): Also support const0_rtx and constm1_rtx broadcasts. Handle vector broadcasts from constant and variable scalars. * config/i386/i386.h (machine_function): Add recursive_function. gcc/testsuite/ * gcc.target/i386/keylocker-aesdecwide128kl.c: Updated to expect movdqa instead pxor. * gcc.target/i386/keylocker-aesdecwide256kl.c: Likewise. * gcc.target/i386/keylocker-aesencwide128kl.c: Likewise. * gcc.target/i386/keylocker-aesencwide256kl.c: Likewise. * gcc.target/i386/pr92080-4.c: New test. * gcc.target/i386/pr92080-5.c: Likewise. * gcc.target/i386/pr92080-6.c: Likewise. * gcc.target/i386/pr92080-7.c: Likewise. * gcc.target/i386/pr92080-8.c: Likewise. * gcc.target/i386/pr92080-9.c: Likewise. * gcc.target/i386/pr92080-10.c: Likewise. * gcc.target/i386/pr92080-11.c: Likewise. * gcc.target/i386/pr92080-12.c: Likewise. * gcc.target/i386/pr92080-13.c: Likewise. * gcc.target/i386/pr92080-14.c: Likewise. * gcc.target/i386/pr92080-15.c: Likewise. * gcc.target/i386/pr92080-16.c: Likewise. * gcc.target/i386/pr92080-17.c: Likewise. * gcc.target/i386/pr92080-18.c: Likewise. * gcc.target/i386/pr92080-19.c: Likewise. * gcc.target/i386/pr92080-20.c: Likewise. Signed-off-by: H.J. Lu Diff: --- gcc/config/i386/i386-expand.cc | 3 + gcc/config/i386/i386-features.cc | 427 - gcc/config/i386/i386.h | 3 + .../gcc.target/i386/keylocker-aesdecwide128kl.c| 14 +- .../gcc.target/i386/keylocker-aesdecwide256kl.c| 14 +- .../gcc.target/i386/keylocker-aesencwide128kl.c| 14 +- .../gcc.target/i386/keylocker-aesencwide256kl.c| 14 +- gcc/testsuite/gcc.target/i386/pr92080-10.c | 13 + gcc/testsuite/gcc.target/i386/pr92080-11.c | 33 ++ gcc/testsuite/gcc.target/i386/pr92080-12.c | 16 + gcc/testsuite/gcc.target/i386/pr92080-13.c | 32 ++ gcc/testsuite/gcc.target/i386/pr92080-14.c | 31 ++ gcc/testsuite/gcc.target/i386/pr92080-15.c | 25 ++ gcc/testsuite/gcc.target/i386/pr92080-16.c | 26 ++ gcc/testsuite/gcc.target/i386/pr92080-17.c | 40 ++ gcc/testsuite/gcc.target/i386/pr92080-18.c | 19 + gcc/testsuite/gcc.target/i386/pr92080-19.c | 20 + gcc/testsuite/gcc.target/i386/pr92080-20.c | 20 + gcc/testsuite/gcc.target/i386/pr92080-4.c | 50 +++ gcc/testsuite/gcc.target/i386/pr92080-5.c | 109 ++ gcc/testsuite/gcc.target/i386/pr92080-6.c | 19 + gcc/testsuite/gcc.target/i386/pr92080-7.c | 20 + gcc/testsuite/gcc.target/i386/pr92080-8.c | 16 + gcc/testsuite/gcc.target/i386/pr92080-9.c | 81 24 files changed, 939 insertions(+), 120 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 423fc632003d..8e556f1b9c18 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -10141,6 +10141,9 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, else if (lookup_attribute ("no_callee_saved_registers", TYPE_ATTRIB
[gcc r16-1645] middle-end: replace log_vf usages with vf to allow support for non-power of two vf
https://gcc.gnu.org/g:309dbcea2cabb31bde1a65cdfd30bb7f87b170a2 commit r16-1645-g309dbcea2cabb31bde1a65cdfd30bb7f87b170a2 Author: Tamar Christina Date: Tue Jun 24 07:13:22 2025 +0100 middle-end: replace log_vf usages with vf to allow support for non-power of two vf This patch fixes a bug where the current code assumed that exact_log2 returns NULL on failure, but it instead returns -1. So there are some cases where the right shift could shift out the entire value. Secondly it also removes the requirement that VF be a power of two. With an uneven unroll factor we can easily end up with a non-power of two VF which SLP can handle. This replaces shifts with multiplication and division. The 32-bit x86 testcase from PR64110 was always wrong, it used to match by pure coincidence a vmovd inside the vector loop. What it intended to match was that the argument to the function isn't spilled and then reloaded from the stack for no reason. But on 32-bit x86 all arguments are passed on the stack anyway and so the match would have never worked. The patch seems to simplify the loop preheader which gets it to remove an intermediate zero extend which causes the match to now properly fail. As such I'm skipping the test on 32-bit x86. gcc/ChangeLog: * tree-vect-loop-manip.cc (vect_gen_vector_loop_niters, vect_gen_vector_loop_niters_mult_vf): Remove uses of log_vf. gcc/testsuite/ChangeLog: * gcc.target/i386/pr64110.c: Update testcase. Diff: --- gcc/testsuite/gcc.target/i386/pr64110.c | 2 +- gcc/tree-vect-loop-manip.cc | 36 + 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr64110.c b/gcc/testsuite/gcc.target/i386/pr64110.c index 99e391916cb7..11a6929835f4 100644 --- a/gcc/testsuite/gcc.target/i386/pr64110.c +++ b/gcc/testsuite/gcc.target/i386/pr64110.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O3 -march=core-avx2" } */ -/* { dg-final { scan-assembler "vmovd\[\\t \]" } } */ +/* { dg-final { scan-assembler "vmovd\[\\t \]" { target { ! ilp32 } } } } */ int foo (void); int a; diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 20dc0e556527..469694377499 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -2794,7 +2794,6 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, tree niters_vector, step_vector, type = TREE_TYPE (niters); poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); - tree log_vf = NULL_TREE; /* If epilogue loop is required because of data accesses with gaps, we subtract one iteration from the total number of iterations here for @@ -2820,22 +2819,25 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, if (vf.is_constant (&const_vf) && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) { - /* Create: niters >> log2(vf) */ + /* Create: niters / vf, which is equivalent to niters >> log2(vf) when +vf is a power of two, and when not we approximate using a +truncating division. */ /* If it's known that niters == number of latch executions + 1 doesn't -overflow, we can generate niters >> log2(vf); otherwise we generate -(niters - vf) >> log2(vf) + 1 by using the fact that we know ratio +overflow, we can generate niters / vf; otherwise we generate +(niters - vf) / vf + 1 by using the fact that we know ratio will be at least one. */ - log_vf = build_int_cst (type, exact_log2 (const_vf)); + tree var_vf = build_int_cst (type, const_vf); if (niters_no_overflow) - niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf); + niters_vector = fold_build2 (TRUNC_DIV_EXPR, type, ni_minus_gap, +var_vf); else niters_vector = fold_build2 (PLUS_EXPR, type, -fold_build2 (RSHIFT_EXPR, type, +fold_build2 (TRUNC_DIV_EXPR, type, fold_build2 (MINUS_EXPR, type, ni_minus_gap, - build_int_cst (type, vf)), - log_vf), + var_vf), + var_vf), build_int_cst (type, 1)); step_vector = build_one_cst (type); } @@ -2854,16 +2856,17 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, /* Peeling algorithm guarantees that vector loop bound is at least ONE, we set range information to make niters analyzer's life easier. N
[gcc r16-1646] middle-end: Apply loop->unroll directly in vectorizer
https://gcc.gnu.org/g:7f87bfa4a7302ce663db51fb073a40045052cc11 commit r16-1646-g7f87bfa4a7302ce663db51fb073a40045052cc11 Author: Tamar Christina Date: Tue Jun 24 07:14:27 2025 +0100 middle-end: Apply loop->unroll directly in vectorizer Consider the loop void f1 (int *restrict a, int n) { #pragma GCC unroll 4 requested for (int i = 0; i < n; i++) a[i] *= 2; } Which today is vectorized and then unrolled 3x by the RTL unroller due to the use of the pragma. This is unfortunate because the pragma was intended for the scalar loop but we end up with an unrolled vector loop and a longer path to the entry which has a low enough VF requirement to enter. This patch instead seeds the suggested_unroll_factor with the value the user requested and instead uses it to maintain the total VF that the user wanted the scalar loop to maintain. In effect it applies the unrolling inside the vector loop itself. This has the benefits for things like reductions, as it allows us to split the accumulator and so the unrolled loop is more efficient. For early-break it allows the cbranch call to be shared between the unrolled elements, giving you more effective unrolling because it doesn't need the repeated cbranch which can be expensive. The target can then choose to create multiple epilogues to deal with the "rest". The example above now generates: .L4: ldr q31, [x2] add v31.4s, v31.4s, v31.4s str q31, [x2], 16 cmp x2, x3 bne .L4 as V4SI maintains the requested VF, but e.g. pragma unroll 8 generates: .L4: ldp q30, q31, [x2] add v30.4s, v30.4s, v30.4s add v31.4s, v31.4s, v31.4s stp q30, q31, [x2], 32 cmp x3, x2 bne .L4 gcc/ChangeLog: * doc/extend.texi: Document pragma unroll interaction with vectorizer. * tree-vectorizer.h (LOOP_VINFO_USER_UNROLL): New. (class _loop_vec_info): Add user_unroll. * tree-vect-loop.cc (vect_analyze_loop_1): Set suggested_unroll_factor and retry. (_loop_vec_info::_loop_vec_info): Initialize user_unroll. (vect_transform_loop): Clear the loop->unroll value if the pragma was used. gcc/testsuite/ChangeLog: * gcc.target/aarch64/unroll-vect.c: New test. Diff: --- gcc/doc/extend.texi| 5 ++ gcc/testsuite/gcc.target/aarch64/unroll-vect.c | 20 gcc/tree-vect-loop.cc | 63 +++--- gcc/tree-vectorizer.h | 5 ++ 4 files changed, 77 insertions(+), 16 deletions(-) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 69c651207464..7da99f77ec82 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -10382,6 +10382,11 @@ loop or a @code{#pragma GCC ivdep}, and applies only to the loop that follows. @var{n} is an integer constant expression specifying the unrolling factor. The values of @math{0} and @math{1} block any unrolling of the loop. +If the loop was vectorized the unroll factor specified will be used to seed the +vectorizer unroll factor. Whether the loop is unrolled or not will be +determined by target costing. The resulting vectorized loop may still be +unrolled more in later passes depending on the target costing. + @end table @node Thread-Local diff --git a/gcc/testsuite/gcc.target/aarch64/unroll-vect.c b/gcc/testsuite/gcc.target/aarch64/unroll-vect.c new file mode 100644 index ..3cb774ba9578 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/unroll-vect.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv8-a --param aarch64-autovec-preference=asimd-only -std=gnu99" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +/* +** f1: +** ... +** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** add v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** ... +*/ +void f1 (int *restrict a, int n) +{ +#pragma GCC unroll 16 + for (int i = 0; i < n; i++) +a[i] *= 2; +} + diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index eb2eb8b1fc08..9ee8e50ee75a 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1074,6 +1074,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) peeling_for_gaps (false), peeling_for_niter (false), early_breaks (false), +user_unroll (false), no_data_dependencies (false), has_mask_store (false), scalar_loop_scaling (profile_probability::uninitialized ()), @@ -3429,27 +3430,50 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared
[gcc r16-1632] contrib: handle GDB's 'unexpected core files' count
https://gcc.gnu.org/g:4e9104ae5455a3c02c2a7e07f52e6bc574cc761d commit r16-1632-g4e9104ae5455a3c02c2a7e07f52e6bc574cc761d Author: Andrew Burgess Date: Mon Jun 23 16:17:19 2025 +0100 contrib: handle GDB's 'unexpected core files' count This commit is for the benefit of GDB, but as the binutils-gdb repository shares the contrib/ directory with gcc, this commit must first be applied to gcc then copied back to binutils-gdb. This commit extends the two scripts contrib/dg-extract-results.{py,sh} to handle GDB's 'unexpected core files' count. This test result type should never appear in GCC, or any other tool that shares the contrib/ directory, so this change should be harmless for others. The 'unexpected core files' count was added to GDB's results by this series: https://inbox.sourceware.org/gdb-patches/20220623183053.172430-1-pe...@palves.net this count is added to the gdb.sum file after all the tests have run, and counts up any core.* files that have appeared. GDB also has a make-check-all.sh script which runs a test with all the different board files that GDB supports. After each test is run the 'unexpected core files' count will be added to that board's results. I'm now trying to use the dg-extract-results.* scripts to merge the results from all the different board files, and the 'unexpected core files' count is confusing these scripts. contrib/ChangeLog: * dg-extract-results.py: Handle GDB's unexpected core file count. * dg-extract-results.sh: Likewise. Diff: --- contrib/dg-extract-results.py | 3 ++- contrib/dg-extract-results.sh | 8 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/contrib/dg-extract-results.py b/contrib/dg-extract-results.py index f539275ba03c..c5bfbcaa0202 100644 --- a/contrib/dg-extract-results.py +++ b/contrib/dg-extract-results.py @@ -146,7 +146,8 @@ class Prog: '# of unresolved testcases\t', '# of unsupported tests\t\t', '# of paths in test names\t', -'# of duplicate test names\t' +'# of duplicate test names\t', +'# of unexpected core files\t' ] self.runs = dict() diff --git a/contrib/dg-extract-results.sh b/contrib/dg-extract-results.sh index c2f760498da4..d64ba2558388 100755 --- a/contrib/dg-extract-results.sh +++ b/contrib/dg-extract-results.sh @@ -403,7 +403,7 @@ BEGIN { variant="$VAR" tool="$TOOL" passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kpasscnt=0; kfailcnt=0; unsupcnt=0; unrescnt=0; dgerrorcnt=0; - pathcnt=0; dupcnt=0 + pathcnt=0; dupcnt=0; corecnt=0 curvar=""; insummary=0 } /^Running target / { curvar = \$3; next } @@ -420,6 +420,7 @@ BEGIN { /^# of unsupported tests/ { if (insummary == 1) unsupcnt += \$5; next; } /^# of paths in test names/{ if (insummary == 1) pathcnt += \$7; next; } /^# of duplicate test names/ { if (insummary == 1) dupcnt += \$6; next; } +/^# of unexpected core files/ { if (insummary == 1) corecnt += \$6; next; } /^$/ { if (insummary == 1) { insummary = 0; curvar = "" } next @@ -439,6 +440,7 @@ END { if (unsupcnt != 0) printf ("# of unsupported tests\t\t%d\n", unsupcnt) if (pathcnt != 0) printf ("# of paths in test names\t%d\n", pathcnt) if (dupcnt != 0) printf ("# of duplicate test names\t%d\n", dupcnt) + if (corecnt != 0) printf ("# of unexpected core files\t%d\n", corecnt) } EOF @@ -460,7 +462,7 @@ cat << EOF > $TOTAL_AWK BEGIN { tool="$TOOL" passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kfailcnt=0; unsupcnt=0; unrescnt=0; dgerrorcnt=0 - pathcnt=0; dupcnt=0 + pathcnt=0; dupcnt=0; corecnt=0 } /^# of DejaGnu errors/ { dgerrorcnt += \$5 } /^# of expected passes/{ passcnt += \$5 } @@ -474,6 +476,7 @@ BEGIN { /^# of unsupported tests/ { unsupcnt += \$5 } /^# of paths in test names/{ pathcnt += \$7 } /^# of duplicate test names/ { dupcnt += \$6 } +/^# of unexpected core files/ { corecnt += \$6 } END { printf ("\n\t\t=== %s Summary ===\n\n", tool) if (dgerrorcnt != 0) printf ("# of DejaGnu errors\t\t%d\n", dgerrorcnt) @@ -488,6 +491,7 @@ END { if (unsupcnt != 0) printf ("# of unsupported tests\t\t%d\n", unsupcnt) if (pathcnt != 0) printf ("# of paths in test names\t%d\n", pathcnt) if (dupcnt != 0) printf ("# of duplicate test names\t%d\n", dupcnt) + if (corecnt != 0) printf ("# of unexpected core files\t%d\n", corecnt) } EOF
[gcc(refs/users/aoliva/heads/testme)] [lra] catch all to-sp eliminations
https://gcc.gnu.org/g:62b20bad36bd3fcb34d6c2cea71ee10abd686e08 commit 62b20bad36bd3fcb34d6c2cea71ee10abd686e08 Author: Alexandre Oliva Date: Sun Jun 22 17:34:54 2025 -0300 [lra] catch all to-sp eliminations An x86_64-linux-gnu native with ix86_frame_pointer_required modified to return true for nonzero frames, to exercize lra_update_fp2sp_elimination, reveals in stage1 testing that wrong code is generated for gcc.c-torture/execute/ieee/fp-cmp-8l.c: argp-to-sp eliminations are used for one_test to pass its arguments on to *pos, and the sp offsets survive the disabling of that elimination. We didn't really have to disable that elimination, but the backend disables eliminations to sp if frame_pointer_needed. The workaround for this scenario is to compile with -maccumulate-outgoing-args. This change extends the catching of fp2sp eliminations to all (?) eliminations to sp, since none of them can be properly reversed and would silently lead to wrong code. This is probably too strict. for gcc/ChangeLog PR rtl-optimization/120424 * lra-eliminations.cc (elimination_2sp_occurred_p): Rename from... (elimination_fp2sp_occured_p): ... this. Adjust all uses. (lra_eliminate_regs_1): Don't require a from-frame-pointer elimination to set it. (update_reg_eliminate): Likewise to test it. Diff: --- gcc/lra-eliminations.cc | 46 +- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/gcc/lra-eliminations.cc b/gcc/lra-eliminations.cc index 9cdd0c5ff53a..341587f21f2e 100644 --- a/gcc/lra-eliminations.cc +++ b/gcc/lra-eliminations.cc @@ -309,8 +309,18 @@ move_plus_up (rtx x) return x; } -/* Flag that we already did frame pointer to stack pointer elimination. */ -static bool elimination_fp2sp_occured_p = false; +/* Flag that we already applied nonzero stack pointer elimination + offset; such sp updates cannot currently be undone. */ +static bool elimination_2sp_occurred_p = false; + +/* Take note of any nonzero sp-OFFSET used in eliminations to sp. */ +static inline poly_int64 +note_spoff (poly_int64 offset) +{ + if (maybe_ne (offset)) +elimination_2sp_occurred_p = true; + return offset; +} /* Scan X and replace any eliminable registers (such as fp) with a replacement (such as sp) if SUBST_P, plus an offset. The offset is @@ -369,13 +379,10 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, { rtx to = subst_p ? ep->to_rtx : ep->from_rtx; - if (ep->to_rtx == stack_pointer_rtx && ep->from == FRAME_POINTER_REGNUM) - elimination_fp2sp_occured_p = true; - if (maybe_ne (update_sp_offset, 0)) { if (ep->to_rtx == stack_pointer_rtx) - return plus_constant (Pmode, to, update_sp_offset); + return plus_constant (Pmode, to, note_spoff (update_sp_offset)); return to; } else if (update_p) @@ -385,7 +392,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, ep->offset - (insn != NULL_RTX && ep->to_rtx == stack_pointer_rtx -? lra_get_insn_recog_data (insn)->sp_offset +? note_spoff (lra_get_insn_recog_data + (insn)->sp_offset) : 0)); else return to; @@ -402,19 +410,18 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, poly_int64 offset, curr_offset; rtx to = subst_p ? ep->to_rtx : ep->from_rtx; - if (ep->to_rtx == stack_pointer_rtx && ep->from == FRAME_POINTER_REGNUM) - elimination_fp2sp_occured_p = true; - if (! update_p && ! full_p) return simplify_gen_binary (PLUS, Pmode, to, XEXP (x, 1)); if (maybe_ne (update_sp_offset, 0)) - offset = ep->to_rtx == stack_pointer_rtx ? update_sp_offset : 0; + offset = (ep->to_rtx == stack_pointer_rtx + ? note_spoff (update_sp_offset) + : 0); else offset = (update_p ? ep->offset - ep->previous_offset : ep->offset); if (full_p && insn != NULL_RTX && ep->to_rtx == stack_pointer_rtx) - offset -= lra_get_insn_recog_data (insn)->sp_offset; + offset -= note_spoff (lra_get_insn_recog_data (insn)->sp_offset); if (poly_int_rtx_p (XEXP (x, 1), &curr_offset) && known_eq (curr_offset, -offset)) return to; @@ -465,15 +472,13 @@ lra_eliminate_regs_1 (rtx_insn *insn, r
[gcc/aoliva/heads/testme] [lra] catch all to-sp eliminations
The branch 'aoliva/heads/testme' was updated to point to: 62b20bad36bd... [lra] catch all to-sp eliminations It previously pointed to: 87076bd78202... [lra] catch all to-sp eliminations Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- 87076bd... [lra] catch all to-sp eliminations Summary of changes (added commits): --- 62b20ba... [lra] catch all to-sp eliminations