[gcc r16-1669] rtl-ssa: Rewrite process_uses_of_deleted_def [PR120745]
https://gcc.gnu.org/g:76f7f91de08de49f39c612bdc9a44a6a8b45325f commit r16-1669-g76f7f91de08de49f39c612bdc9a44a6a8b45325f Author: Richard Sandiford Date: Wed Jun 25 10:44:34 2025 +0100 rtl-ssa: Rewrite process_uses_of_deleted_def [PR120745] process_uses_of_deleted_def seems to have been written on the assumption that non-degenerate phis would be explicitly deleted by an insn_change, and that the function therefore only needed to delete degenerate phis. But that was inconsistent with the rest of the code, and wouldn't be very convenient in any case. This patch therefore rewrites process_uses_of_deleted_def to handle general phis. I'm not aware that this fixes any issues in current code, but it is needed to enable the rtl-ssa dce work that Ondřej and Honza are working on. gcc/ PR rtl-optimization/120745 * rtl-ssa/changes.cc (process_uses_of_deleted_def): Rewrite to handle deletions of non-degenerate phis. Diff: --- gcc/rtl-ssa/changes.cc | 36 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc index f7aa6a66cdf5..00e6c3185644 100644 --- a/gcc/rtl-ssa/changes.cc +++ b/gcc/rtl-ssa/changes.cc @@ -258,28 +258,40 @@ rtl_ssa::changes_are_worthwhile (array_slice changes, void function_info::process_uses_of_deleted_def (set_info *set) { - if (!set->has_any_uses ()) -return; - - auto *use = *set->all_uses ().begin (); - do + // Each member of the worklist is either SET or a dead phi. + auto_vec worklist; + worklist.quick_push (set); + while (!worklist.is_empty ()) { - auto *next_use = use->next_use (); + auto *this_set = worklist.pop (); + auto *use = this_set->first_use (); + if (!use) + { + if (this_set != set) + delete_phi (as_a (this_set)); + continue; + } if (use->is_in_phi ()) { - // This call will not recurse. - process_uses_of_deleted_def (use->phi ()); - delete_phi (use->phi ()); + // Removing all uses from the phi ensures that we'll only add + // the phi to the worklist once. + auto *phi = use->phi (); + for (auto *input : phi->inputs ()) + { + remove_use (input); + input->set_def (nullptr); + } + worklist.safe_push (phi); } else { gcc_assert (use->is_live_out_use ()); remove_use (use); } - use = next_use; + // The phi handling above might have removed multiple uses of THIS_SET. + if (this_set->has_any_uses ()) + worklist.safe_push (this_set); } - while (use); - gcc_assert (!set->has_any_uses ()); } // Update the REG_NOTES of INSN, whose pattern has just been changed.
[gcc r16-1670] libstdc++: Test for %S precision for durations with integral representation.
https://gcc.gnu.org/g:1bac0fcd04662138f0a91057914a1be420cb92d5 commit r16-1670-g1bac0fcd04662138f0a91057914a1be420cb92d5 Author: Tomasz Kamiński Date: Tue Jun 24 13:49:26 2025 +0200 libstdc++: Test for %S precision for durations with integral representation. Existing test are extented to cover cases where not precision is specified, or it is specified to zero. The precision value is ignored in all cases. libstdc++-v3/ChangeLog: * testsuite/std/time/format/precision.cc: New tests. Diff: --- .../testsuite/std/time/format/precision.cc | 104 - 1 file changed, 99 insertions(+), 5 deletions(-) diff --git a/libstdc++-v3/testsuite/std/time/format/precision.cc b/libstdc++-v3/testsuite/std/time/format/precision.cc index ccb2c77ce05e..aa266156c1ff 100644 --- a/libstdc++-v3/testsuite/std/time/format/precision.cc +++ b/libstdc++-v3/testsuite/std/time/format/precision.cc @@ -16,6 +16,10 @@ test_empty() std::basic_string res; const duration d(33.111222); + res = std::format(WIDEN("{:}"), d); + VERIFY( res == WIDEN("33.1112s") ); + res = std::format(WIDEN("{:.0}"), d); + VERIFY( res == WIDEN("33.1112s") ); res = std::format(WIDEN("{:.3}"), d); VERIFY( res == WIDEN("33.1112s") ); res = std::format(WIDEN("{:.6}"), d); @@ -25,6 +29,10 @@ test_empty() // Uses ostream operator<< const duration nd = d; + res = std::format(WIDEN("{:}"), nd); + VERIFY( res == WIDEN("3.31112e+10ns") ); + res = std::format(WIDEN("{:.0}"), nd); + VERIFY( res == WIDEN("3.31112e+10ns") ); res = std::format(WIDEN("{:.3}"), nd); VERIFY( res == WIDEN("3.31112e+10ns") ); res = std::format(WIDEN("{:.6}"), nd); @@ -40,6 +48,10 @@ test_Q() std::basic_string res; const duration d(7.111222); + res = std::format(WIDEN("{:%Q}"), d); + VERIFY( res == WIDEN("7.111222") ); + res = std::format(WIDEN("{:.0%Q}"), d); + VERIFY( res == WIDEN("7.111222") ); res = std::format(WIDEN("{:.3%Q}"), d); VERIFY( res == WIDEN("7.111222") ); res = std::format(WIDEN("{:.6%Q}"), d); @@ -47,7 +59,23 @@ test_Q() res = std::format(WIDEN("{:.9%Q}"), d); VERIFY( res == WIDEN("7.111222") ); + duration md = d; + res = std::format(WIDEN("{:%Q}"), md); + VERIFY( res == WIDEN("7111.222") ); + res = std::format(WIDEN("{:.0%Q}"), md); + VERIFY( res == WIDEN("7111.222") ); + res = std::format(WIDEN("{:.3%Q}"), md); + VERIFY( res == WIDEN("7111.222") ); + res = std::format(WIDEN("{:.6%Q}"), md); + VERIFY( res == WIDEN("7111.222") ); + res = std::format(WIDEN("{:.9%Q}"), md); + VERIFY( res == WIDEN("7111.222") ); + const duration nd = d; + res = std::format(WIDEN("{:%Q}"), nd); + VERIFY( res == WIDEN("7111222000") ); + res = std::format(WIDEN("{:.0%Q}"), nd); + VERIFY( res == WIDEN("7111222000") ); res = std::format(WIDEN("{:.3%Q}"), nd); VERIFY( res == WIDEN("7111222000") ); res = std::format(WIDEN("{:.6%Q}"), nd); @@ -58,12 +86,16 @@ test_Q() template void -test_S() +test_S_fp() { std::basic_string res; // Precision is ignored, but period affects output - const duration d(5.111222); + duration d(5.111222); + res = std::format(WIDEN("{:%S}"), d); + VERIFY( res == WIDEN("05") ); + res = std::format(WIDEN("{:.0%S}"), d); + VERIFY( res == WIDEN("05") ); res = std::format(WIDEN("{:.3%S}"), d); VERIFY( res == WIDEN("05") ); res = std::format(WIDEN("{:.6%S}"), d); @@ -71,7 +103,11 @@ test_S() res = std::format(WIDEN("{:.9%S}"), d); VERIFY( res == WIDEN("05") ); - const duration md = d; + duration md = d; + res = std::format(WIDEN("{:%S}"), md); + VERIFY( res == WIDEN("05.111") ); + res = std::format(WIDEN("{:.0%S}"), md); + VERIFY( res == WIDEN("05.111") ); res = std::format(WIDEN("{:.3%S}"), md); VERIFY( res == WIDEN("05.111") ); res = std::format(WIDEN("{:.6%S}"), md); @@ -79,13 +115,70 @@ test_S() res = std::format(WIDEN("{:.9%S}"), md); VERIFY( res == WIDEN("05.111") ); - const duration nd = d; + duration ud = d; + res = std::format(WIDEN("{:%S}"), ud); + VERIFY( res == WIDEN("05.111222") ); + res = std::format(WIDEN("{:.0%S}"), ud); + VERIFY( res == WIDEN("05.111222") ); + res = std::format(WIDEN("{:.3%S}"), ud); + VERIFY( res == WIDEN("05.111222") ); + res = std::format(WIDEN("{:.6%S}"), ud); + VERIFY( res == WIDEN("05.111222") ); + res = std::format(WIDEN("{:.9%S}"), ud); + VERIFY( res == WIDEN("05.111222") ); + + duration nd = d; + res = std::format(WIDEN("{:%S}"), nd); + VERIFY( res == WIDEN("05.111222000") ); + res = std::format(WIDEN("{:.0%S}"), nd); + VERIFY( res == WIDEN("05.111222000") ); res = std::format(WIDEN("{:.3%S}"), nd); VERIFY( res == WIDEN("05.111222000") ); res = std::format(WIDEN("{:.6%S}"), nd); VERIFY( res == WIDEN("05.111222000") ); res = std::format(WIDEN("{:.9%S}"), nd); VERIFY( res == WIDEN("05.111222000") ); + + duration pd = d; + res = std::format(WIDEN("{:%S}"), pd); + VERIFY( res == WIDEN("05.111
[gcc r16-1674] get_bitmask is sometimes less refined.
https://gcc.gnu.org/g:3b84d180fa840a447c4fd08d655ea0c2d94abd0b commit r16-1674-g3b84d180fa840a447c4fd08d655ea0c2d94abd0b Author: Andrew MacLeod Date: Tue Jun 24 13:10:56 2025 -0400 get_bitmask is sometimes less refined. get_bitmask intersects the current mask with a mask generated from the range. If the 2 masks are incompatible, it currently returns UNKNOWN. Instead, ti should return the original mask or information is lost. * value-range.cc (irange::get_bitmask): Return original mask if result is unknown. (assert_snap_result): New. (test_irange_snap_bounds): New. (range_tests_misc): Call test_irange_snap_bounds. Diff: --- gcc/value-range.cc | 117 - 1 file changed, 116 insertions(+), 1 deletion(-) diff --git a/gcc/value-range.cc b/gcc/value-range.cc index 23a5c66ed5e3..85c1e26287e9 100644 --- a/gcc/value-range.cc +++ b/gcc/value-range.cc @@ -2513,7 +2513,13 @@ irange::get_bitmask () const // See also the note in irange_bitmask::intersect. irange_bitmask bm (type (), lower_bound (), upper_bound ()); if (!m_bitmask.unknown_p ()) -bm.intersect (m_bitmask); +{ + bm.intersect (m_bitmask); + // If the new intersection is unknown, it means there are inconstent + // bits, so simply return the original bitmask. + if (bm.unknown_p ()) + return m_bitmask; +} return bm; } @@ -2879,6 +2885,112 @@ range_tests_strict_enum () ASSERT_FALSE (ir1.varying_p ()); } +// Test that range bounds are "snapped" to where they are expected to be. + +static void +assert_snap_result (int lb_val, int ub_val, + int expected_lb, int expected_ub, + unsigned mask_val, unsigned value_val, + tree type) +{ + wide_int lb = wi::shwi (lb_val, TYPE_PRECISION (type)); + wide_int ub = wi::shwi (ub_val, TYPE_PRECISION (type)); + wide_int new_lb, new_ub; + + irange_bitmask bm (wi::uhwi (value_val, TYPE_PRECISION (type)), +wi::uhwi (mask_val, TYPE_PRECISION (type))); + + int_range_max r (type); + r.set (type, lb, ub); + r.update_bitmask (bm); + + if (TYPE_SIGN (type) == SIGNED && expected_ub < expected_lb) +gcc_checking_assert (r.undefined_p ()); + else if (TYPE_SIGN (type) == UNSIGNED + && ((unsigned)expected_ub < (unsigned)expected_lb)) +gcc_checking_assert (r.undefined_p ()); + else +{ + gcc_checking_assert (wi::eq_p (r.lower_bound (), +wi::shwi (expected_lb, + TYPE_PRECISION (type; + gcc_checking_assert (wi::eq_p (r.upper_bound (), +wi::shwi (expected_ub, + TYPE_PRECISION (type; +} +} + + +// Run a selection of tests that confirm, bounds are snapped as expected. +// We only test individual pairs, multiple pairs use the same snapping +// routine as single pairs. + +static void +test_irange_snap_bounds () +{ + tree u32 = unsigned_type_node; + tree s32 = integer_type_node; + tree s8 = build_nonstandard_integer_type (8, /*unsigned=*/ 0); + tree s1 = build_nonstandard_integer_type (1, /*unsigned=*/ 0); + tree u1 = build_nonstandard_integer_type (1, /*unsigned=*/ 1); + + // Basic aligned range: even-only + assert_snap_result (5, 15, 6, 14, 0xFFFE, 0x0, u32); + // Singleton that doesn't match mask: undefined. + assert_snap_result (7, 7, 1, 0, 0xFFFE, 0x0, u32); + // 8-bit signed char, mask 0xF0 (i.e. step of 16). + assert_snap_result (-100, 100, -96, 96, 0xF0, 0x00, s8); + // Already aligned range: no change. + assert_snap_result (0, 240, 0, 240, 0xF0, 0x00, u32); + // Negative range, step 16 alignment (s32). + assert_snap_result (-123, -17, -112, -32, 0xFFF0, 0x00, s32); + // Negative range, step 16 alignment (trailing-zero aligned mask). + assert_snap_result (-123, -17, -112, -32, 0xFFF0, 0x00, s32); + // s8, 16-alignment mask, value = 0 (valid). + assert_snap_result (-50, 10, -48, 0, 0xF0, 0x00, s8); + // No values in range [-3,2] match alignment except 0. + assert_snap_result (-3, 2, 0, 0, 0xF8, 0x00, s8); + // No values in range [-3,2] match alignment — undefined. + assert_snap_result (-3, 2, 1, 0, 0xF8, 0x04, s8); + // Already aligned range: no change. + assert_snap_result (0, 240, 0, 240, 0xF0, 0x00, s32); + // 1-bit signed: only -1 allowed (0b1). + assert_snap_result (-1, 0, -1, -1, 0x00, 0x01, s1); + // 1-bit signed: only 0 allowed (0b0). + assert_snap_result (-1, 0, 0, 0, 0x00, 0x00, s1); + // 1-bit signed: no match (invalid case). + assert_snap_result (-1, -1, 1, 0, 0x00, 0x00, s1); + // 1-bit signed: no match (invalid case). + assert_snap_result (0, 0, 1, 0, 0x00, 0x01, s1); + // 1-bit unsigned: only 1 allowed. + assert_snap_result (0, 1, 1, 1, 0x00, 0x01, u1); + // 1-bit unsigned: only 0 allowed. + assert
[gcc r16-1672] tree-optimization/120808 - SLP build with mixed .FMA/.FMS
https://gcc.gnu.org/g:143e50f2ed56da6c9fab9c6344465832b641ca21 commit r16-1672-g143e50f2ed56da6c9fab9c6344465832b641ca21 Author: Richard Biener Date: Wed Jun 25 09:24:41 2025 +0200 tree-optimization/120808 - SLP build with mixed .FMA/.FMS The following allows SLP build to succeed when mixing .FMA/.FMS in different lanes like we handle mixed plus/minus. This does not yet address SLP pattern matching to not being able to form a FMADDSUB from this. PR tree-optimization/120808 * tree-vectorizer.h (compatible_calls_p): Add flag to indicate a FMA/FMS pair is allowed. * tree-vect-slp.cc (compatible_calls_p): Likewise. (vect_build_slp_tree_1): Allow mixed .FMA/.FMS as two-operator. (vect_build_slp_tree_2): Handle calls in two-operator SLP build. * tree-vect-slp-patterns.cc (compatible_complex_nodes_p): Adjust. * gcc.dg/vect/bb-slp-pr120808.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c | 12 +++ gcc/tree-vect-slp-patterns.cc | 2 +- gcc/tree-vect-slp.cc| 52 - gcc/tree-vectorizer.h | 2 +- 4 files changed, 50 insertions(+), 18 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c new file mode 100644 index ..c334d6ad8d39 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-ffp-contract=on" } */ +/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */ + +void f(double x[restrict], double *y, double *z) +{ +x[0] = x[0] * y[0] + z[0]; +x[1] = x[1] * y[1] - z[1]; +} + +/* The following should check for SLP build covering the loads. */ +/* { dg-final { scan-tree-dump "transform load" "slp2" { target { x86_64-*-* i?86-*-* } } } } */ diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc index c0dff90d9baf..24ae203e6ffe 100644 --- a/gcc/tree-vect-slp-patterns.cc +++ b/gcc/tree-vect-slp-patterns.cc @@ -786,7 +786,7 @@ compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache, if (is_gimple_call (a_stmt)) { if (!compatible_calls_p (dyn_cast (a_stmt), -dyn_cast (b_stmt))) +dyn_cast (b_stmt), false)) return false; } else if (!is_gimple_assign (a_stmt)) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index dc89da3bf177..603dfc0d4b2d 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -986,13 +986,18 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, to be combined into the same SLP group. */ bool -compatible_calls_p (gcall *call1, gcall *call2) +compatible_calls_p (gcall *call1, gcall *call2, bool allow_two_operators) { unsigned int nargs = gimple_call_num_args (call1); if (nargs != gimple_call_num_args (call2)) return false; - if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2)) + auto cfn1 = gimple_call_combined_fn (call1); + auto cfn2 = gimple_call_combined_fn (call2); + if (cfn1 != cfn2 + && (!allow_two_operators + || !((cfn1 == CFN_FMA || cfn1 == CFN_FMS) + && (cfn2 == CFN_FMA || cfn2 == CFN_FMS return false; if (gimple_call_internal_p (call1)) @@ -1354,10 +1359,14 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, || rhs_code != IMAGPART_EXPR) /* Handle mismatches in plus/minus by computing both and merging the results. */ - && !((first_stmt_code == PLUS_EXPR -|| first_stmt_code == MINUS_EXPR) - && (alt_stmt_code == PLUS_EXPR - || alt_stmt_code == MINUS_EXPR) + && !first_stmt_code == PLUS_EXPR + || first_stmt_code == MINUS_EXPR) + && (alt_stmt_code == PLUS_EXPR + || alt_stmt_code == MINUS_EXPR)) +|| ((first_stmt_code == CFN_FMA + || first_stmt_code == CFN_FMS) +&& (alt_stmt_code == CFN_FMA +|| alt_stmt_code == CFN_FMS))) && rhs_code == alt_stmt_code) && !(first_stmt_code.is_tree_code () && rhs_code.is_tree_code () @@ -1406,7 +1415,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, { if (!is_a (stmts[0]->stmt) || !compatible_calls_p (as_a (stmts[0]->stmt), - call_stmt)) + call_stmt, true)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_locat
[gcc r15-9860] aarch64: Incorrect removal of ZA restore [PR120624]
https://gcc.gnu.org/g:cb3c5b7d15cdb9373d102e7045c0823526a9c660 commit r15-9860-gcb3c5b7d15cdb9373d102e7045c0823526a9c660 Author: Richard Sandiford Date: Wed Jun 25 17:28:42 2025 +0100 aarch64: Incorrect removal of ZA restore [PR120624] The PCS defines a lazy save scheme for managing ZA across normal "private-ZA" functions. GCC currently uses this scheme for calls to all private-ZA functions (rather than using caller-save). Therefore, before a sequence of calls to private-ZA functions, GCC emits code to set up a lazy save. After the sequence of calls, GCC emits code to check whether lazy save was committed and restore the ZA contents if so. These sequences are emitted by the mode-switching pass, in an attempt to reduce the number of redundant saves and restores. The lazy save scheme also means that, before a function can use ZA, it must first conditionally store the old contents of ZA to the caller's lazy save buffer, if any. This all creates some relatively complex dependencies between setup code, save/restore code, and normal reads from and writes to ZA. These dependencies are modelled using special fake hard registers: ;; Sometimes we use placeholder instructions to mark where later ;; ABI-related lowering is needed. These placeholders read and ;; write this register. Instructions that depend on the lowering ;; read the register. (LOWERING_REGNUM 87) ;; Represents the contents of the current function's TPIDR2 block, ;; in abstract form. (TPIDR2_BLOCK_REGNUM 88) ;; Holds the value that the current function wants PSTATE.ZA to be. ;; The actual value can sometimes vary, because it does not track ;; changes to PSTATE.ZA that happen during a lazy save and restore. ;; Those effects are instead tracked by ZA_SAVED_REGNUM. (SME_STATE_REGNUM 89) ;; Instructions write to this register if they set TPIDR2_EL0 to a ;; well-defined value. Instructions read from the register if they ;; depend on the result of such writes. ;; ;; The register does not model the architected TPIDR2_ELO, just the ;; current function's management of it. (TPIDR2_SETUP_REGNUM 90) ;; Represents the property "has an incoming lazy save been committed?". (ZA_FREE_REGNUM 91) ;; Represents the property "are the current function's ZA contents ;; stored in the lazy save buffer, rather than in ZA itself?". (ZA_SAVED_REGNUM 92) ;; Represents the contents of the current function's ZA state in ;; abstract form. At various times in the function, these contents ;; might be stored in ZA itself, or in the function's lazy save buffer. ;; ;; The contents persist even when the architected ZA is off. Private-ZA ;; functions have no effect on its contents. (ZA_REGNUM 93) Every normal read from ZA and write to ZA depends on SME_STATE_REGNUM, in order to sequence the code with the initial setup of ZA and with the lazy save scheme. The code to restore ZA after a call involves several instructions, including conditional control flow. It is initially represented as a single define_insn and is split late, after shrink-wrapping and prologue/epilogue insertion. The split form of the restore instruction includes a conditional call to __arm_tpidr2_restore: (define_insn "aarch64_tpidr2_restore" [(set (reg:DI ZA_SAVED_REGNUM) (unspec:DI [(reg:DI R0_REGNUM)] UNSPEC_TPIDR2_RESTORE)) (set (reg:DI SME_STATE_REGNUM) (unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE)) ... ) The write to SME_STATE_REGNUM indicates the end of the region where ZA_REGNUM might differ from the real contents of ZA. In other words, it is the point at which normal reads from ZA and writes to ZA can safely take place. To finally get to the point, the problem in this PR was that the unsplit aarch64_restore_za pattern was missing this change to SME_STATE_REGNUM. It could therefore be deleted as dead before it had chance to be split. The split form had the correct dataflow, but the unsplit form didn't. Unfortunately, the tests for this code tended to use calls and asms to model regions of ZA usage, and those don't seem to be affected in the same way. gcc/ PR target/120624 * config/aarch64/aarch64.md (SME_STATE_REGNUM): Expand on comments. * config/aarch64/aarch64-sme.md (aarch64_restore_za): Also set SME_STATE_REGNUM gcc/testsuite/ PR target/120624 * gcc.target/aarch64/sme/za_state_7.c: New test. (cherry picked from commit 8546265e2ee386ea8a4b2f91
[gcc r16-1668] libstdc++: Report compilation error on formatting "%d" from month_last [PR120650]
https://gcc.gnu.org/g:190f077fe5f318e168a7a1e1aa57058f377e commit r16-1668-g190f077fe5f318e168a7a1e1aa57058f377e Author: Tomasz Kamiński Date: Tue Jun 24 09:17:12 2025 +0200 libstdc++: Report compilation error on formatting "%d" from month_last [PR120650] For month_day we incorrectly reported day information to be available, which lead to format_error being thrown from the call to formatter::format at runtime, instead of making call to format ill-formed. The included test cover most of the combinations of _ChronoParts and format specifiers. PR libstdc++/120650 libstdc++-v3/ChangeLog: * include/bits/chrono_io.h (formatter::parse): Call _M_parse with only Month being available. * testsuite/std/time/format/data_not_present_neg.cc: New test. Diff: --- libstdc++-v3/include/bits/chrono_io.h | 3 +- .../std/time/format/data_not_present_neg.cc| 164 + 2 files changed, 165 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/bits/chrono_io.h b/libstdc++-v3/include/bits/chrono_io.h index abbf4efcc3bf..4eb00f4932db 100644 --- a/libstdc++-v3/include/bits/chrono_io.h +++ b/libstdc++-v3/include/bits/chrono_io.h @@ -2199,8 +2199,7 @@ namespace __format constexpr typename basic_format_parse_context<_CharT>::iterator parse(basic_format_parse_context<_CharT>& __pc) { - return _M_f._M_parse(__pc, __format::_Month|__format::_Day, -__defSpec); + return _M_f._M_parse(__pc, __format::_Month, __defSpec); } template diff --git a/libstdc++-v3/testsuite/std/time/format/data_not_present_neg.cc b/libstdc++-v3/testsuite/std/time/format/data_not_present_neg.cc new file mode 100644 index ..bb09451dc29c --- /dev/null +++ b/libstdc++-v3/testsuite/std/time/format/data_not_present_neg.cc @@ -0,0 +1,164 @@ +// { dg-do compile { target c++20 } } + +#include +#include + +using namespace std::chrono; + +auto d1 = std::format("{:%w}", 10d); // { dg-error "call to consteval function" } +auto d2 = std::format("{:%m}", 10d); // { dg-error "call to consteval function" } +auto d3 = std::format("{:%y}", 10d); // { dg-error "call to consteval function" } +auto d4 = std::format("{:%F}", 10d); // { dg-error "call to consteval function" } +auto d5 = std::format("{:%T}", 10d); // { dg-error "call to consteval function" } +auto d6 = std::format("{:%Q}", 10d); // { dg-error "call to consteval function" } +auto d7 = std::format("{:%Z}", 10d); // { dg-error "call to consteval function" } + +auto w1 = std::format("{:%d}", Thursday); // { dg-error "call to consteval function" } +auto w2 = std::format("{:%m}", Thursday); // { dg-error "call to consteval function" } +auto w3 = std::format("{:%y}", Thursday); // { dg-error "call to consteval function" } +auto w4 = std::format("{:%F}", Thursday); // { dg-error "call to consteval function" } +auto w5 = std::format("{:%T}", Thursday); // { dg-error "call to consteval function" } +auto w6 = std::format("{:%Q}", Thursday); // { dg-error "call to consteval function" } +auto w7 = std::format("{:%Z}", Thursday); // { dg-error "call to consteval function" } + +auto wi1 = std::format("{:%d}", Thursday[2]); // { dg-error "call to consteval function" } +auto wi2 = std::format("{:%m}", Thursday[2]); // { dg-error "call to consteval function" } +auto wi3 = std::format("{:%y}", Thursday[2]); // { dg-error "call to consteval function" } +auto wi4 = std::format("{:%F}", Thursday[2]); // { dg-error "call to consteval function" } +auto wi5 = std::format("{:%T}", Thursday[2]); // { dg-error "call to consteval function" } +auto wi6 = std::format("{:%Q}", Thursday[2]); // { dg-error "call to consteval function" } +auto wi7 = std::format("{:%Z}", Thursday[2]); // { dg-error "call to consteval function" } + +auto wl1 = std::format("{:%d}", Thursday[last]); // { dg-error "call to consteval function" } +auto wl2 = std::format("{:%m}", Thursday[last]); // { dg-error "call to consteval function" } +auto wl3 = std::format("{:%y}", Thursday[last]); // { dg-error "call to consteval function" } +auto wl4 = std::format("{:%F}", Thursday[last]); // { dg-error "call to consteval function" } +auto wl5 = std::format("{:%T}", Thursday[last]); // { dg-error "call to consteval function" } +auto wl6 = std::format("{:%Q}", Thursday[last]); // { dg-error "call to consteval function" } +auto wl7 = std::format("{:%Z}", Thursday[last]); // { dg-error "call to consteval function" } + +auto m1 = std::format("{:%d}", January); // { dg-error "call to consteval function" } +auto m2 = std::format("{:%w}", January); // { dg-error "call to consteval function" } +auto m3 = std::format("{:%y}", January); // { dg-error "call to consteval function" } +auto m4 = std::format("{:%F}", January); // { dg-error "call to consteval function" } +auto m5 = std::format("{:%T}",
[gcc r16-1671] ivopts: Change constant_multiple_of to expand aff nodes.
https://gcc.gnu.org/g:e7ff8e8d77df7407e075f1c0cede5c97cda5eba7 commit r16-1671-ge7ff8e8d77df7407e075f1c0cede5c97cda5eba7 Author: Alfie Richards Date: Tue Jun 24 13:49:27 2025 + ivopts: Change constant_multiple_of to expand aff nodes. This changes the calls to tree_to_aff_combination in constant_multiple_of to tree_to_aff_combination_expand along with associated plumbing of ivopts_data and required cache. This improves cases such as: ```c void f(int *p1, int *p2, unsigned long step, unsigned long end, svbool_t pg) { for (unsigned long i = 0; i < end; i += step) { svst1(pg, p1, svld1_s32(pg, p2)); p1 += step; p2 += step; } } ``` Where ivopts previously didn't expand the SSA variables for the step increements and so lacked the ability to group all the IV's and ended up with: ``` f: cbz x3, .L1 mov x4, 0 .L3: ld1wz31.s, p0/z, [x1] add x4, x4, x2 st1wz31.s, p0, [x0] add x1, x1, x2, lsl 2 add x0, x0, x2, lsl 2 cmp x3, x4 bhi .L3 .L1: ret ``` After this change we end up with: ``` f: cbz x3, .L1 mov x4, 0 .L3: ld1wz31.s, p0/z, [x1, x4, lsl 2] st1wz31.s, p0, [x0, x4, lsl 2] add x4, x4, x2 cmp x3, x4 bhi .L3 .L1: ret ``` gcc/ChangeLog: * tree-ssa-loop-ivopts.cc (constant_multiple_of): Change tree_to_aff_combination to tree_to_aff_combination_expand and add parameter to take ivopts_data. (get_computation_aff_1): Change parameters and calls to include ivopts_data. (get_computation_aff): Ditto. (get_computation_at) Ditto.: (get_debug_computation_at) Ditto.: (get_computation_cost) Ditto.: (rewrite_use_nonlinear_expr) Ditto.: (rewrite_use_address) Ditto.: (rewrite_use_compare) Ditto.: (remove_unused_ivs) Ditto.: gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/adr_7.c: New test. Diff: --- gcc/testsuite/gcc.target/aarch64/sve/adr_7.c | 24 ++ gcc/tree-ssa-loop-ivopts.cc | 65 +++- 2 files changed, 59 insertions(+), 30 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_7.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_7.c new file mode 100644 index ..be9f1eb24c21 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_7.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +#include + +void f(int *p1, int *p2, unsigned long step, unsigned long end, svbool_t pg) { +for (unsigned long i = 0; i < end; i += step) { +svst1(pg, p1, svld1_s32(pg, p2)); +p1 += step; +p2 += step; +} +} + +// Checking that the induction variables are combined into a single variable, +// which is used for all addressing. +// (ie, theres only one scalar add, rather than 3, and the loads and stores use the +// more complex addressing modes) + +/* { dg-final { scan-assembler-not {\tld1w\tz[0-9]+\.d, p[0-9]+/z\[x[0-9]+\.d\]} } } */ +/* { dg-final { scan-assembler-not {\tst1w\tz[0-9]+\.d, p[0-9]+/z\[x[0-9]+\.d\]} } } */ + +/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x[0-9]+, x[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 1 } } */ diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc index 8a6726f19889..544a946ff890 100644 --- a/gcc/tree-ssa-loop-ivopts.cc +++ b/gcc/tree-ssa-loop-ivopts.cc @@ -2117,11 +2117,15 @@ idx_record_use (tree base, tree *idx, signedness of TOP and BOT. */ static bool -constant_multiple_of (tree top, tree bot, widest_int *mul) +constant_multiple_of (tree top, tree bot, widest_int *mul, + struct ivopts_data *data) { aff_tree aff_top, aff_bot; - tree_to_aff_combination (top, TREE_TYPE (top), &aff_top); - tree_to_aff_combination (bot, TREE_TYPE (bot), &aff_bot); + tree_to_aff_combination_expand (top, TREE_TYPE (top), &aff_top, + &data->name_expansion_cache); + tree_to_aff_combination_expand (bot, TREE_TYPE (bot), &aff_bot, + &data->name_expansion_cache); + poly_widest_int poly_mul; if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul) && poly_mul.is_constant (mul)) @@ -3945,13 +3949,14 @@ determine_common_wider_type (tree *a, tree *b) } /* Determines the expression by that USE is expressed from induction variable -
[gcc r16-1673] tree-optimization/109892 - SLP reduction of fma
https://gcc.gnu.org/g:5aca8510abea6c3fac3336a7445863db07fd4a06 commit r16-1673-g5aca8510abea6c3fac3336a7445863db07fd4a06 Author: Richard Biener Date: Wed Jun 25 10:36:59 2025 +0200 tree-optimization/109892 - SLP reduction of fma The following adds the ability to vectorize a fma reduction pair as SLP reduction (we cannot yet handle ternary association in reduction vectorization yet). PR tree-optimization/109892 * tree-vect-loop.cc (check_reduction_path): Handle fma. (vectorizable_reduction): Apply FOLD_LEFT_REDUCTION code generation constraints. * gcc.dg/vect/vect-reduc-fma-1.c: New testcase. * gcc.dg/vect/vect-reduc-fma-2.c: Likewise. * gcc.dg/vect/vect-reduc-fma-3.c: Likewise. Diff: --- gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c | 15 +++ gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c | 20 gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c | 16 gcc/tree-vect-loop.cc| 17 + 4 files changed, 68 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c new file mode 100644 index ..e958b43e23b6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */ + +double f(double x[], long n) +{ +double r0 = 0, r1 = 0; +for (; n; x += 2, n--) { +r0 = __builtin_fma(x[0], x[0], r0); +r1 = __builtin_fma(x[1], x[1], r1); +} +return r0 + r1; +} + +/* We should vectorize this as SLP reduction. */ +/* { dg-final { scan-tree-dump "loop vectorized using 16 byte vectors and unroll factor 1" "vect" { target { x86_64-*-* i?86-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c new file mode 100644 index ..ea1ca9720e5a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-ffp-contract=on" } */ +/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */ + +static double muladd(double x, double y, double z) +{ +return x * y + z; +} +double g(double x[], long n) +{ +double r0 = 0, r1 = 0; +for (; n; x += 2, n--) { +r0 = muladd(x[0], x[0], r0); +r1 = muladd(x[1], x[1], r1); +} +return r0 + r1; +} + +/* We should vectorize this as SLP reduction. */ +/* { dg-final { scan-tree-dump "loop vectorized using 16 byte vectors and unroll factor 1" "vect" { target { x86_64-*-* i?86-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c new file mode 100644 index ..10cecedd8e5f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-fma-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-ffast-math" } */ +/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */ + +double f(double x[], long n) +{ +double r0 = 0, r1 = 0; +for (; n; x += 2, n--) { +r0 = __builtin_fma(x[0], x[0], r0); +r1 = __builtin_fma(x[1], x[1], r1); +} +return r0 + r1; +} + +/* We should vectorize this as SLP reduction, higher VF possible. */ +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" { target { x86_64-*-* i?86-*-* } } } } */ diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 9ee8e50ee75a..5b6769af31c3 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -4126,6 +4126,10 @@ pop: if (op.ops[2] == op.ops[opi]) neg = ! neg; } + /* For an FMA the reduction code is the PLUS if the addition chain +is the reduction. */ + else if (op.code == IFN_FMA && opi == 2) + op.code = PLUS_EXPR; if (CONVERT_EXPR_CODE_P (op.code) && tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0]))) ; @@ -8070,6 +8074,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo, "in-order reduction chain without SLP.\n"); return false; } + /* Code generation doesn't support function calls other +than .COND_*. */ + if (!op.code.is_tree_code () + && !(op.code.is_internal_fn () + && conditional_internal_fn_code (internal_fn (op.code)) + != ERROR_MARK)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, +"in-order reduction chain operation not " +"supported.\n"); + return false; + } STMT_VINFO_REDUC_TYPE (reduc_info) = reduction_type = FOLD_LEFT_REDUCTION; }
[gcc r16-1678] Mark pass_sccopy gate and execute functions as final override
https://gcc.gnu.org/g:db5cda85e636e0f65b57d807cac214d38356 commit r16-1678-gdb5cda85e636e0f65b57d807cac214d38356 Author: Martin Jambor Date: Wed Jun 25 16:53:03 2025 +0200 Mark pass_sccopy gate and execute functions as final override It is customary to mark the gate and execute functions of the classes representing passes as final override but this is missing in pass_sccopy. This patch adds it which also silences clang warnings about it. gcc/ChangeLog: 2025-06-24 Martin Jambor * gimple-ssa-sccopy.cc (class pass_sccopy): Mark member functions gate and execute as final override. Diff: --- gcc/gimple-ssa-sccopy.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/gimple-ssa-sccopy.cc b/gcc/gimple-ssa-sccopy.cc index c93374572a9e..341bae46080b 100644 --- a/gcc/gimple-ssa-sccopy.cc +++ b/gcc/gimple-ssa-sccopy.cc @@ -699,8 +699,8 @@ public: {} /* opt_pass methods: */ - virtual bool gate (function *) { return true; } - virtual unsigned int execute (function *); + virtual bool gate (function *) final override { return true; } + virtual unsigned int execute (function *) final override; opt_pass * clone () final override { return new pass_sccopy (m_ctxt); } }; // class pass_sccopy
[gcc r16-1679] coroutines: Remove unused private member in cp_coroutine_transform
https://gcc.gnu.org/g:8b4b0f7af730ccda59e344d36c5be2eb5855bfd6 commit r16-1679-g8b4b0f7af730ccda59e344d36c5be2eb5855bfd6 Author: Martin Jambor Date: Wed Jun 25 16:56:58 2025 +0200 coroutines: Remove unused private member in cp_coroutine_transform When building GCC with clang, it warns that the private member suffix in class cp_coroutine_transform (defined in gcc/cp/coroutines.h) is not used which indeed looks like it is the case. This patch therefore removes it. gcc/cp/ChangeLog: 2025-06-24 Martin Jambor * coroutines.h (class cp_coroutine_transform): Remove member orig_fn_body. Diff: --- gcc/cp/coroutines.h | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/cp/coroutines.h b/gcc/cp/coroutines.h index 919dc9ab06b6..fcc464579157 100644 --- a/gcc/cp/coroutines.h +++ b/gcc/cp/coroutines.h @@ -100,7 +100,6 @@ public: private: tree orig_fn_decl;/* The original function decl. */ - tree orig_fn_body = NULL_TREE; /* The original function body. */ location_t fn_start = UNKNOWN_LOCATION; location_t fn_end = UNKNOWN_LOCATION; tree resumer = error_mark_node;
[gcc r16-1681] tree-ssa-propagate.h: Mark two functions as override
https://gcc.gnu.org/g:2670d11b029eca33774acb28ad7c87c3d14c9643 commit r16-1681-g2670d11b029eca33774acb28ad7c87c3d14c9643 Author: Martin Jambor Date: Wed Jun 25 17:02:10 2025 +0200 tree-ssa-propagate.h: Mark two functions as override When tree-ssa-propagate.h is compiled with clang, it complains that member functions functions value_of_expr and range_of_expr of class substitute_and_fold_engine are not marked as override even though they do override virtual functions of the ancestor class. This patch merely adds the keyword to silence the warning and for consistency's sake. I did not make this part of the previous patch because I wanted to point out that the first case is quite unusual, a virtual function with a functional body (range_query::value_of_expr) is being overridden with a pure virtual function. I assume it was a conscious decision but adding the override keyword seems even more important then. gcc/ChangeLog: 2025-06-24 Martin Jambor * tree-ssa-propagate.h (class substitute_and_fold_engine): Mark member functions value_of_expr and range_of_expr as override. Diff: --- gcc/tree-ssa-propagate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h index 8429e38f40e4..200fc7320796 100644 --- a/gcc/tree-ssa-propagate.h +++ b/gcc/tree-ssa-propagate.h @@ -102,10 +102,10 @@ class substitute_and_fold_engine : public range_query substitute_and_fold_engine (bool fold_all_stmts = false) : fold_all_stmts (fold_all_stmts) { } - virtual tree value_of_expr (tree expr, gimple * = NULL) = 0; + virtual tree value_of_expr (tree expr, gimple * = NULL) override = 0; virtual tree value_on_edge (edge, tree expr) override; virtual tree value_of_stmt (gimple *, tree name = NULL) override; - virtual bool range_of_expr (vrange &r, tree expr, gimple * = NULL); + virtual bool range_of_expr (vrange &r, tree expr, gimple * = NULL) override; virtual ~substitute_and_fold_engine (void) { } virtual bool fold_stmt (gimple_stmt_iterator *) { return false; }
[gcc r16-1693] x86: Handle REG_EH_REGION note in DEF_INSN
https://gcc.gnu.org/g:62a80185db84f20f3efb05c81598bffa95bcd63d commit r16-1693-g62a80185db84f20f3efb05c81598bffa95bcd63d Author: H.J. Lu Date: Wed Jun 25 12:50:53 2025 +0800 x86: Handle REG_EH_REGION note in DEF_INSN For tcpsock_test.go in libgo tests, commit aba3b9d3a48a0703fd565f7c5f0caf604f59970b Author: H.J. Lu Date: Fri May 9 07:17:07 2025 +0800 x86: Extend the remove_redundant_vector pass added an instruction: (insn 501 101 102 21 (set (reg:V2DI 234) (vec_duplicate:V2DI (reg:DI 111 [ _46 ]))) "tcpsock_test.go":691:12 discrim 1 -1 (nil)) after (insn 101 100 501 21 (set (reg:DI 111 [ _46 ]) (mem:DI (reg/f:DI 110 [ _45 ]) [5 *_45+0 S8 A64])) "tcpsock_test.go":691:12 discrim 1 99 {*movdi_internal} (expr_list:REG_DEAD (reg/f:DI 110 [ _45 ]) (expr_list:REG_EH_REGION (const_int 1 [0x1]) (nil which resulted in (insn 101 100 501 21 (set (reg:DI 111 [ _46 ]) (mem:DI (reg/f:DI 110 [ _45 ]) [5 *_45+0 S8 A64])) "tcpsock_test.go":691:12 discrim 1 99 {*movdi_internal} (expr_list:REG_DEAD (reg/f:DI 110 [ _45 ]) (expr_list:REG_EH_REGION (const_int 1 [0x1]) (nil (insn 501 101 102 21 (set (reg:V2DI 234) (vec_duplicate:V2DI (reg:DI 111 [ _46 ]))) "tcpsock_test.go":691:12 discrim 1 -1 (nil)) and caused: tcpsock_test.go: In function 'net.TestTCPBig..func2': tcpsock_test.go:684:28: error: in basic block 21: 684 | go func() { |^ tcpsock_test.go:684:28: error: flow control insn inside a basic block (insn 101 100 501 21 (set (reg:DI 111 [ _46 ]) (mem:DI (reg/f:DI 110 [ _45 ]) [5 *_45+0 S8 A64])) "tcpsock_test.go":691:12 discrim 1 99 {*movdi_internal} (expr_list:REG_DEAD (reg/f:DI 110 [ _45 ]) (expr_list:REG_EH_REGION (const_int 1 [0x1]) (nil during RTL pass: rrvl tcpsock_test.go:684:28: internal compiler error: in rtl_verify_bb_insns, at cfgrtl.cc:2834 Copy the REG_EH_REGION note to the newly added instruction and split the block after the previous instruction. PR target/120816 * config/i386/i386-features.cc (remove_redundant_vector_load): Handle REG_EH_REGION note in DEF_INSN. Signed-off-by: H.J. Lu Diff: --- gcc/config/i386/i386-features.cc | 32 1 file changed, 32 insertions(+) diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index be2ce3103dde..d942bf08b56f 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3820,6 +3820,8 @@ remove_redundant_vector_load (void) if (replaced) { + auto_vec control_flow_insns; + /* (Re-)discover loops so that bb->loop_father can be used in the analysis below. */ calculate_dominance_info (CDI_DOMINATORS); @@ -3835,6 +3837,20 @@ remove_redundant_vector_load (void) rtx set = gen_rtx_SET (load->broadcast_reg, load->broadcast_source); insn = emit_insn_after (set, load->def_insn); + + if (cfun->can_throw_non_call_exceptions) + { + /* Handle REG_EH_REGION note in DEF_INSN. */ + rtx note = find_reg_note (load->def_insn, + REG_EH_REGION, nullptr); + if (note) + { + control_flow_insns.safe_push (load->def_insn); + add_reg_note (insn, REG_EH_REGION, + XEXP (note, 0)); + } + } + if (dump_file) { fprintf (dump_file, "\nAdd:\n\n"); @@ -3855,6 +3871,22 @@ remove_redundant_vector_load (void) loop_optimizer_finalize (); + if (!control_flow_insns.is_empty ()) + { + free_dominance_info (CDI_DOMINATORS); + + FOR_EACH_VEC_ELT (control_flow_insns, i, insn) + if (control_flow_insn_p (insn)) + { + /* Split the block after insn. There will be a fallthru + edge, which is OK so we keep it. We have to create + the exception edges ourselves. */ + bb = BLOCK_FOR_INSN (insn); + split_block (bb, insn); + rtl_make_eh_edge (NULL, bb, BB_END (bb)); + } + } + df_process_deferred_rescans (); }
[gcc r16-1694] x86: Also handle all 1s float vector constant
https://gcc.gnu.org/g:77473a27bae04da99d6979d43e7bd0a8106f4557 commit r16-1694-g77473a27bae04da99d6979d43e7bd0a8106f4557 Author: H.J. Lu Date: Thu Jun 26 06:08:51 2025 +0800 x86: Also handle all 1s float vector constant Since float vector constant (const_vector:V4SF [(const_double:SF -QNaN [-QNaN]) repeated x4]) is an all 1s float vector constant, update the remove_redundant_vector pass to replace (insn 20 18 21 2 (set (reg:V4SF 124) (const_vector:V4SF [ (const_double:SF -QNaN [-QNaN]) repeated x4 ])) "x.cc":26:5 2426 {movv4sf_internal} (nil)) with (insn 49 2 5 2 (set (reg:V16QI 135) (const_vector:V16QI [ (const_int -1 [0x]) repeated x16 ])) -1 (nil)) ... (insn 20 18 21 2 (set (reg:V4SF 124) (subreg:V4SF (reg:V16QI 135) 0)) "x.cc":26:5 2426 {movv4sf_internal} (nil)) gcc/ PR target/120819 * config/i386/i386-features.cc (ix86_broadcast_inner): Also handle all 1s float vector constant. gcc/testsuite/ PR target/120819 * g++.target/i386/pr120819.C: New test. Signed-off-by: H.J. Lu Diff: --- gcc/config/i386/i386-features.cc | 6 -- gcc/testsuite/g++.target/i386/pr120819.C | 37 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index d942bf08b56f..fb4a9ec9903b 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3532,8 +3532,10 @@ ix86_broadcast_inner (rtx op, machine_mode mode, *insn_p = nullptr; return const0_rtx; } - else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT - && (op == constm1_rtx || op == CONSTM1_RTX (mode))) + else if ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT + && (op == constm1_rtx || op == CONSTM1_RTX (mode))) + || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT + && float_vector_all_ones_operand (op, mode))) { *scalar_mode_p = QImode; *kind_p = X86_CSE_CONSTM1_VECTOR; diff --git a/gcc/testsuite/g++.target/i386/pr120819.C b/gcc/testsuite/g++.target/i386/pr120819.C new file mode 100644 index ..d0b05dfd0525 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr120819.C @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -march=znver2 -std=gnu++17 -w" } */ + +typedef float a __attribute__ ((__vector_size__ (16))); +typedef long long b __attribute__ ((__vector_size__ (16))); +int c; +a d, e, f; +b g, h; +struct i +{ + i (b j) : k (j) {} + i (); + b k; +}; +i +l (int j) +{ + g = (b)(__attribute__ ((__vector_size__ (4 * sizeof (1 int){ j, j, j, + j }; + return g; +} +extern int m (); +void +n () +{ + h = (__attribute__ (( + __vector_size__ (2 * sizeof (long long long long){ c }; + i o, p = l (2147483647 * 2 + 1); + f = __builtin_ia32_blendvps (a (p.k), d, e); + if (m ()) +{ + i q = l (2147483647 * 2 + 1); + a r = __builtin_ia32_blendvps (a (q.k), d, e); + o = b (r); + i s; +} +}
[gcc r16-1692] x86: Add preserve_none and update no_caller_saved_registers attributes
https://gcc.gnu.org/g:9804b23198b39f85a7258be556c5e8aed44b9efc commit r16-1692-g9804b23198b39f85a7258be556c5e8aed44b9efc Author: H.J. Lu Date: Sun Apr 13 11:38:24 2025 -0700 x86: Add preserve_none and update no_caller_saved_registers attributes Add preserve_none attribute which is similar to no_callee_saved_registers attribute, except on x86-64, r12, r13, r14, r15, rdi and rsi registers are used for integer parameter passing. This can be used in an interpreter to avoid saving/restoring the registers in functions which process byte codes. It improved the pystones benchmark by 6-7%: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119628#c15 Remove -mgeneral-regs-only restriction on no_caller_saved_registers attribute. Only SSE is allowed since SSE XMM register load preserves the upper bits in YMM/ZMM register while YMM register load zeros the upper 256 bits of ZMM register, and preserving 32 ZMM registers can be quite expensive. gcc/ PR target/119628 * config/i386/i386-expand.cc (ix86_expand_call): Call ix86_type_no_callee_saved_registers_p instead of looking up no_callee_saved_registers attribute. * config/i386/i386-options.cc (ix86_set_func_type): Look up preserve_none attribute. Check preserve_none attribute for interrupt attribute. Don't check no_caller_saved_registers nor no_callee_saved_registers conflicts here. (ix86_set_func_type): Check no_callee_saved_registers before checking no_caller_saved_registers attribute. (ix86_set_current_function): Allow SSE with no_caller_saved_registers attribute. (ix86_handle_call_saved_registers_attribute): Check preserve_none, no_callee_saved_registers and no_caller_saved_registers conflicts. (ix86_gnu_attributes): Add preserve_none attribute. * config/i386/i386-protos.h (ix86_type_no_callee_saved_registers_p): New. * config/i386/i386.cc (x86_64_preserve_none_int_parameter_registers): New. (ix86_using_red_zone): Don't use red-zone when there are no caller-saved registers with SSE. (ix86_type_no_callee_saved_registers_p): New. (ix86_function_ok_for_sibcall): Also check TYPE_PRESERVE_NONE and call ix86_type_no_callee_saved_registers_p instead of looking up no_callee_saved_registers attribute. (ix86_comp_type_attributes): Call ix86_type_no_callee_saved_registers_p instead of looking up no_callee_saved_registers attribute. Return 0 if preserve_none attribute doesn't match in 64-bit mode. (ix86_function_arg_regno_p): For cfun with TYPE_PRESERVE_NONE, use x86_64_preserve_none_int_parameter_registers. (init_cumulative_args): Set preserve_none_abi. (function_arg_64): Use x86_64_preserve_none_int_parameter_registers with preserve_none attribute. (setup_incoming_varargs_64): Use x86_64_preserve_none_int_parameter_registers with preserve_none attribute. (ix86_save_reg): Treat TYPE_PRESERVE_NONE like TYPE_NO_CALLEE_SAVED_REGISTERS. (ix86_nsaved_sseregs): Allow saving XMM registers for no_caller_saved_registers attribute. (ix86_compute_frame_layout): Likewise. (x86_this_parameter): Use x86_64_preserve_none_int_parameter_registers with preserve_none attribute. * config/i386/i386.h (ix86_args): Add preserve_none_abi. (call_saved_registers_type): Add TYPE_PRESERVE_NONE. (machine_function): Change call_saved_registers to 3 bits. * doc/extend.texi: Add preserve_none attribute. Update no_caller_saved_registers attribute to remove -mgeneral-regs-only restriction. gcc/testsuite/ PR target/119628 * gcc.target/i386/no-callee-saved-3.c: Adjust error location. * gcc.target/i386/no-callee-saved-19a.c: New test. * gcc.target/i386/no-callee-saved-19b.c: Likewise. * gcc.target/i386/no-callee-saved-19c.c: Likewise. * gcc.target/i386/no-callee-saved-19d.c: Likewise. * gcc.target/i386/no-callee-saved-19e.c: Likewise. * gcc.target/i386/preserve-none-1.c: Likewise. * gcc.target/i386/preserve-none-2.c: Likewise. * gcc.target/i386/preserve-none-3.c: Likewise. * gcc.target/i386/preserve-none-4.c: Likewise. * gcc.target/i386/preserve-none-5.c: Likewise. * gcc.target/i386/preserve-none-6.c: Likewise. * gcc.target/i386/preserve-none-7.c: Likewise. * gcc.target/i386/preserve-none-8.c: Likewise. * gcc.target/i386/preserve
[gcc/aoliva/heads/testme] (627 commits) [testsuite] restore default action from dfp.exp [PR120631]
The branch 'aoliva/heads/testme' was updated to point to: b7cc7ea036b6... [testsuite] restore default action from dfp.exp [PR120631] It previously pointed to: efab6fca... [lra] catch all to-sp eliminations Diff: Summary of changes (added commits): --- b7cc7ea... [testsuite] restore default action from dfp.exp [PR120631] 407ae3a... Daily bump. (*) b8b08a8... x86: Add debug dump for the remove_redundant_vector pass (*) 5fd8f33... arc: Use intrinsics for __builtin_mul_overflow () (*) 49943c2... arc: Add commutative multiplication patterns (*) 913d8cb... arc: testsuite: Scan rlc instead of mov.hs (*) b9361e5... ARC: Use intrinsics for __builtin_sub_overflow*() (*) 467fa6d... ARC: Use intrinsics for __builtin_add_overflow*() (*) 6af1e74... diagnostics: Mark path_label::get_effects as final override (*) a73cb70... ranger-op: Use CFN_ constant instead of plain BUILTIN_ one (*) 00712ae... value-relation.h: Mark dom_oracle::next_relation as overrid (*) 2670d11... tree-ssa-propagate.h: Mark two functions as override (*) 85f0620... ranger: Mark several member functions as final override (*) 8b4b0f7... coroutines: Remove unused private member in cp_coroutine_tr (*) db5cda8... Mark pass_sccopy gate and execute functions as final overri (*) fb1ba48... Mark rtl_avoid_store_forwarding functions final override (*) c08d6d9... Remove unused vector in value-relation.cc. (*) ec44df7... Promote verify_range to vrange. (*) 3b84d18... get_bitmask is sometimes less refined. (*) 5aca851... tree-optimization/109892 - SLP reduction of fma (*) 143e50f... tree-optimization/120808 - SLP build with mixed .FMA/.FMS (*) e7ff8e8... ivopts: Change constant_multiple_of to expand aff nodes. (*) 1bac0fc... libstdc++: Test for %S precision for durations with integra (*) 76f7f91... rtl-ssa: Rewrite process_uses_of_deleted_def [PR120745] (*) 190... libstdc++: Report compilation error on formatting "%d" from (*) 7fd6cb3... x86: Update -mtune=intel for Diamond Rapids/Clearwater Fore (*) 0c701c7... i386: Remove CLDEMOTE for clients (*) e858dc7... RISC-V: Add Profiles RVA/B23S64 support. (*) aaf55e0... Add -fauto-profile-inlining (*) 3fde750... Remove early inlining from afdo pass (*) 3924740... Daily bump. (*) 750bc28... gcn: Fix glc vs. sc0 handling for scalar memory access (*) 1e35a51... Fortran/OpenACC: Add Fortran support for acc_attach/acc_det (*) 92e1893... RISC-V: Add patterns for vector-scalar multiply-(subtract-) (*) 5bc9271... Fortran: fix ICE in verify_gimple_in_seq with substrings [P (*) ed7fc2b... c++: Implement C++26 P3618R0 - Allow attaching main to the (*) 8f5fac5... i386: Convert LEA stack adjust insn to SUB when FLAGS_REG i (*) 63076db... Remove non-SLP path from vectorizable_load (*) 3f19867... diagnostic: fix for older version of GCC (*) bc8f542... libstdc++: Unnecessary type completion in __is_complete_or_ (*) 0606d2b... gcc: remove atan from edom_only_function (*) bd9cac1... s390: Fix float vector extract for pre-z13 (*) f60d3f5... AArch64: promote aarch64-autovec-peference to mautovec-pref (*) 8e80287... AArch64: propose -mmax-vectorization as an option to overri (*) 3f88230... fortran: Mention user variable in SELECT TYPE temporary var (*) c06979f... Don't duplicate setup code cost when do group-candidate cos (*) 7f87bfa... middle-end: Apply loop->unroll directly in vectorizer (*) 309dbce... middle-end: replace log_vf usages with vf to allow support (*) aba3b9d... x86: Extend the remove_redundant_vector pass (*) d073bb6... x86: Update memcpy/memset inline strategies for -mtune=gene (*) 0235b6d... Copy discriminators when inlining (*) c24eb5e... Fix AFDO zero profile handling (*) 4b739c0... Fix shrink wrap separate ICE for mingw [PR120741] (*) bf7162b... [RISC-V][PR target/118241] Fix data prefetch predicate/cons (*) c4f5308... Daily bump. (*) cdd6785... Fixup dropping REG_EQUAL note in ext-dce (*) d0142e1... libgdiagnostics: sarif-replay: add extra sinks via -fdiagno (*) e6406ae... analyzer: fix missing "final override" (*) 2b07725... OpenACC: Add 'if' clause to 'acc wait' directive (*) 6dd1659... Fortran: fix checking of renamed-on-use interface name [PR1 (*) 4e9104a... contrib: handle GDB's 'unexpected core files' count (*) 2334d30... diagnostics: add state diagrams to analyzer experimental-ht (*) 5a64c96... diagnostics: handle pp_token::kind::event_id in experimenta (*) 11811e6... RISC-V: Add test for vec_duplicate + vsaddu.vv combine case (*) 9a8f82d... RISC-V: Add test for vec_duplicate + vsaddu.vv combine case (*) a2d018b... RISC-V: Combine vec_duplicate + vsaddu.vv to vsaddu.vx on G (*) 35a26f2... tailc: Allow musttail tail calls with -fsanitize=address [P (*) b9523a9... expand: Allow musttail tail calls with -fsanitize=address [ (*) 6deab18... vect: Use combined peeling and versioning for mutually alig (*) 7e4d55f... match: Simplify
[gcc/aoliva/heads/testbase] (626 commits) Daily bump.
The branch 'aoliva/heads/testbase' was updated to point to: 407ae3aa7901... Daily bump. It previously pointed to: 2edb50a31089... [lra] force reg update after spilling to memory [PR120424] Diff: Summary of changes (added commits): --- 407ae3a... Daily bump. (*) b8b08a8... x86: Add debug dump for the remove_redundant_vector pass (*) 5fd8f33... arc: Use intrinsics for __builtin_mul_overflow () (*) 49943c2... arc: Add commutative multiplication patterns (*) 913d8cb... arc: testsuite: Scan rlc instead of mov.hs (*) b9361e5... ARC: Use intrinsics for __builtin_sub_overflow*() (*) 467fa6d... ARC: Use intrinsics for __builtin_add_overflow*() (*) 6af1e74... diagnostics: Mark path_label::get_effects as final override (*) a73cb70... ranger-op: Use CFN_ constant instead of plain BUILTIN_ one (*) 00712ae... value-relation.h: Mark dom_oracle::next_relation as overrid (*) 2670d11... tree-ssa-propagate.h: Mark two functions as override (*) 85f0620... ranger: Mark several member functions as final override (*) 8b4b0f7... coroutines: Remove unused private member in cp_coroutine_tr (*) db5cda8... Mark pass_sccopy gate and execute functions as final overri (*) fb1ba48... Mark rtl_avoid_store_forwarding functions final override (*) c08d6d9... Remove unused vector in value-relation.cc. (*) ec44df7... Promote verify_range to vrange. (*) 3b84d18... get_bitmask is sometimes less refined. (*) 5aca851... tree-optimization/109892 - SLP reduction of fma (*) 143e50f... tree-optimization/120808 - SLP build with mixed .FMA/.FMS (*) e7ff8e8... ivopts: Change constant_multiple_of to expand aff nodes. (*) 1bac0fc... libstdc++: Test for %S precision for durations with integra (*) 76f7f91... rtl-ssa: Rewrite process_uses_of_deleted_def [PR120745] (*) 190... libstdc++: Report compilation error on formatting "%d" from (*) 7fd6cb3... x86: Update -mtune=intel for Diamond Rapids/Clearwater Fore (*) 0c701c7... i386: Remove CLDEMOTE for clients (*) e858dc7... RISC-V: Add Profiles RVA/B23S64 support. (*) aaf55e0... Add -fauto-profile-inlining (*) 3fde750... Remove early inlining from afdo pass (*) 3924740... Daily bump. (*) 750bc28... gcn: Fix glc vs. sc0 handling for scalar memory access (*) 1e35a51... Fortran/OpenACC: Add Fortran support for acc_attach/acc_det (*) 92e1893... RISC-V: Add patterns for vector-scalar multiply-(subtract-) (*) 5bc9271... Fortran: fix ICE in verify_gimple_in_seq with substrings [P (*) ed7fc2b... c++: Implement C++26 P3618R0 - Allow attaching main to the (*) 8f5fac5... i386: Convert LEA stack adjust insn to SUB when FLAGS_REG i (*) 63076db... Remove non-SLP path from vectorizable_load (*) 3f19867... diagnostic: fix for older version of GCC (*) bc8f542... libstdc++: Unnecessary type completion in __is_complete_or_ (*) 0606d2b... gcc: remove atan from edom_only_function (*) bd9cac1... s390: Fix float vector extract for pre-z13 (*) f60d3f5... AArch64: promote aarch64-autovec-peference to mautovec-pref (*) 8e80287... AArch64: propose -mmax-vectorization as an option to overri (*) 3f88230... fortran: Mention user variable in SELECT TYPE temporary var (*) c06979f... Don't duplicate setup code cost when do group-candidate cos (*) 7f87bfa... middle-end: Apply loop->unroll directly in vectorizer (*) 309dbce... middle-end: replace log_vf usages with vf to allow support (*) aba3b9d... x86: Extend the remove_redundant_vector pass (*) d073bb6... x86: Update memcpy/memset inline strategies for -mtune=gene (*) 0235b6d... Copy discriminators when inlining (*) c24eb5e... Fix AFDO zero profile handling (*) 4b739c0... Fix shrink wrap separate ICE for mingw [PR120741] (*) bf7162b... [RISC-V][PR target/118241] Fix data prefetch predicate/cons (*) c4f5308... Daily bump. (*) cdd6785... Fixup dropping REG_EQUAL note in ext-dce (*) d0142e1... libgdiagnostics: sarif-replay: add extra sinks via -fdiagno (*) e6406ae... analyzer: fix missing "final override" (*) 2b07725... OpenACC: Add 'if' clause to 'acc wait' directive (*) 6dd1659... Fortran: fix checking of renamed-on-use interface name [PR1 (*) 4e9104a... contrib: handle GDB's 'unexpected core files' count (*) 2334d30... diagnostics: add state diagrams to analyzer experimental-ht (*) 5a64c96... diagnostics: handle pp_token::kind::event_id in experimenta (*) 11811e6... RISC-V: Add test for vec_duplicate + vsaddu.vv combine case (*) 9a8f82d... RISC-V: Add test for vec_duplicate + vsaddu.vv combine case (*) a2d018b... RISC-V: Combine vec_duplicate + vsaddu.vv to vsaddu.vx on G (*) 35a26f2... tailc: Allow musttail tail calls with -fsanitize=address [P (*) b9523a9... expand: Allow musttail tail calls with -fsanitize=address [ (*) 6deab18... vect: Use combined peeling and versioning for mutually alig (*) 7e4d55f... match: Simplify doubled not, negate and conjugate operators (*) 97044a4... tree-optimization/120729 - limi
[gcc(refs/users/aoliva/heads/testme)] [testsuite] restore default action from dfp.exp [PR120631]
https://gcc.gnu.org/g:b7cc7ea036b6d89a06a5b9134415e8598cc2ae3c commit b7cc7ea036b6d89a06a5b9134415e8598cc2ae3c Author: Alexandre Oliva Date: Thu Jun 26 00:36:45 2025 -0300 [testsuite] restore default action from dfp.exp [PR120631] dfp.exp tests for dfprt before deciding whether to default to run or compile, and the PR120631 tests override that without checking for dfprt. Rework them to avoid attempting to link and run programs when dfp runtime support isn't available. for gcc/testsuite/ChangeLog PR middle-end/120631 * pr120631.c: Drop overrider of dg-do default action. * bitint-9.c: Likewise. * bitint-10.c: Likewise. Diff: --- gcc/testsuite/gcc.dg/dfp/bitint-10.c | 2 +- gcc/testsuite/gcc.dg/dfp/bitint-9.c | 2 +- gcc/testsuite/gcc.dg/dfp/pr120631.c | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/dfp/bitint-10.c b/gcc/testsuite/gcc.dg/dfp/bitint-10.c index b48f0ea6c277..4a73aebe095c 100644 --- a/gcc/testsuite/gcc.dg/dfp/bitint-10.c +++ b/gcc/testsuite/gcc.dg/dfp/bitint-10.c @@ -1,5 +1,5 @@ /* PR middle-end/120631 */ -/* { dg-do run { target bitint } } */ +/* { dg-require-effective-target bitint } */ /* { dg-options "-O2" } */ #if __BITINT_MAXWIDTH__ >= 128 diff --git a/gcc/testsuite/gcc.dg/dfp/bitint-9.c b/gcc/testsuite/gcc.dg/dfp/bitint-9.c index 72155a012475..31614876a12f 100644 --- a/gcc/testsuite/gcc.dg/dfp/bitint-9.c +++ b/gcc/testsuite/gcc.dg/dfp/bitint-9.c @@ -1,5 +1,5 @@ /* PR middle-end/120631 */ -/* { dg-do run { target bitint } } */ +/* { dg-require-effective-target bitint } */ /* { dg-options "-O2" } */ #if __BITINT_MAXWIDTH__ >= 2048 diff --git a/gcc/testsuite/gcc.dg/dfp/pr120631.c b/gcc/testsuite/gcc.dg/dfp/pr120631.c index 2085ff7ba5a7..2533e9de29f8 100644 --- a/gcc/testsuite/gcc.dg/dfp/pr120631.c +++ b/gcc/testsuite/gcc.dg/dfp/pr120631.c @@ -1,5 +1,4 @@ /* PR middle-end/120631 */ -/* { dg-do run } */ /* { dg-options "-O2" } */ _Decimal64 a = 123456789135790.0dd;
[gcc r15-9864] tree-optimization/120654 - ICE with range query from IVOPTs
https://gcc.gnu.org/g:d8a9467163addaf61297443b41f381420d5af7f5 commit r15-9864-gd8a9467163addaf61297443b41f381420d5af7f5 Author: Richard Biener Date: Fri Jun 20 11:14:38 2025 +0200 tree-optimization/120654 - ICE with range query from IVOPTs The following ICEs as we hand down an UNDEFINED range to where it isn't expected. Put the guard that's there earlier. PR tree-optimization/120654 * vr-values.cc (range_fits_type_p): Check for undefined_p () before accessing type (). * gcc.dg/torture/pr120654.c: New testcase. (cherry picked from commit 6bd1223bd55ed60fa5dbfd4a8444e133e5e933f5) Diff: --- gcc/testsuite/gcc.dg/torture/pr120654.c | 24 gcc/vr-values.cc| 10 +- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr120654.c b/gcc/testsuite/gcc.dg/torture/pr120654.c new file mode 100644 index ..3819b78281d0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr120654.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ + +int a, c, e, f, h, j; +long g, k; +void *malloc(long); +void free(void *); +int b(int m) { + if (m || a) +return 1; + return 0.0f; +} +int d(int m, int p2) { return b(m) + m + (1 + p2 + p2); } +int i() { + long l[] = {2, 9, 7, 8, g, g, 9, 0, 2, g}; + e = l[c] << 6; +} +void n() { + long o; + int *p = malloc(sizeof(int)); + k = 1 % j; + for (; i() + f + h; o++) +if (p[d(j + 6, (int)k + 1992695866) + h + f + j + (int)k - 1 + o]) + free(p); +} diff --git a/gcc/vr-values.cc b/gcc/vr-values.cc index 4c787593b95a..5a1ab2b2f1b1 100644 --- a/gcc/vr-values.cc +++ b/gcc/vr-values.cc @@ -1023,6 +1023,10 @@ range_fits_type_p (const irange *vr, widest_int tem; signop src_sgn; + /* Now we can only handle ranges with constant bounds. */ + if (vr->undefined_p () || vr->varying_p ()) +return false; + /* We can only handle integral and pointer types. */ src_type = vr->type (); if (!INTEGRAL_TYPE_P (src_type) @@ -1031,17 +1035,13 @@ range_fits_type_p (const irange *vr, /* An extension is fine unless VR is SIGNED and dest_sgn is UNSIGNED, and so is an identity transform. */ - src_precision = TYPE_PRECISION (vr->type ()); + src_precision = TYPE_PRECISION (src_type); src_sgn = TYPE_SIGN (src_type); if ((src_precision < dest_precision && !(dest_sgn == UNSIGNED && src_sgn == SIGNED)) || (src_precision == dest_precision && src_sgn == dest_sgn)) return true; - /* Now we can only handle ranges with constant bounds. */ - if (vr->undefined_p () || vr->varying_p ()) -return false; - wide_int vrmin = vr->lower_bound (); wide_int vrmax = vr->upper_bound ();
[gcc r15-9865] tree-optimization/120729 - limit compile time in uninit_analysis::prune_phi_opnds
https://gcc.gnu.org/g:59e5e863c7dc5e8a4164d36273c4c2b5f6cd602c commit r15-9865-g59e5e863c7dc5e8a4164d36273c4c2b5f6cd602c Author: Richard Biener Date: Fri Jun 20 15:07:20 2025 +0200 tree-optimization/120729 - limit compile time in uninit_analysis::prune_phi_opnds The testcase in this PR shows, on the GCC 14 branch, that in some degenerate cases we can spend exponential time pruning always initialized paths through a web of PHIs. The following adds --param uninit-max-prune-work, defaulted to 10, to limit that to effectively O(1). PR tree-optimization/120729 * gimple-predicate-analysis.h (uninit_analysis::prune_phi_opnds): Add argument of work budget remaining. * gimple-predicate-analysis.cc (uninit_analysis::prune_phi_opnds): Likewise. Maintain and honor it throughout the recursion. * params.opt (uninit-max-prune-work): New. * doc/invoke.texi (uninit-max-prune-work): Document. (cherry picked from commit 97044a47de533f2a9b3fc864e5ea318e53979079) Diff: --- gcc/doc/invoke.texi | 3 +++ gcc/gimple-predicate-analysis.cc | 12 +--- gcc/gimple-predicate-analysis.h | 2 +- gcc/params.opt | 4 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index baaa0c1aed5e..14750aed64db 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -17308,6 +17308,9 @@ predicate chain. @item uninit-max-num-chains Maximum number of predicates ored in the normalized predicate chain. +@item uninit-max-prune-work +Maximum amount of work done to prune paths where the variable is always initialized. + @item sched-autopref-queue-depth Hardware autoprefetcher scheduler model control flag. Number of lookahead cycles the model looks into; at ' diff --git a/gcc/gimple-predicate-analysis.cc b/gcc/gimple-predicate-analysis.cc index 76f6ab613107..b056b42a17ec 100644 --- a/gcc/gimple-predicate-analysis.cc +++ b/gcc/gimple-predicate-analysis.cc @@ -385,7 +385,8 @@ bool uninit_analysis::prune_phi_opnds (gphi *phi, unsigned opnds, gphi *flag_def, tree boundary_cst, tree_code cmp_code, hash_set *visited_phis, - bitmap *visited_flag_phis) + bitmap *visited_flag_phis, + unsigned &max_attempts) { /* The Boolean predicate guarding the PHI definition. Initialized lazily from PHI in the first call to is_use_guarded() and cached @@ -398,6 +399,10 @@ uninit_analysis::prune_phi_opnds (gphi *phi, unsigned opnds, gphi *flag_def, if (!MASK_TEST_BIT (opnds, i)) continue; + if (max_attempts == 0) + return false; + --max_attempts; + tree flag_arg = gimple_phi_arg_def (flag_def, i); if (!is_gimple_constant (flag_arg)) { @@ -432,7 +437,7 @@ uninit_analysis::prune_phi_opnds (gphi *phi, unsigned opnds, gphi *flag_def, unsigned opnds_arg_phi = m_eval.phi_arg_set (phi_arg_def); if (!prune_phi_opnds (phi_arg_def, opnds_arg_phi, flag_arg_def, boundary_cst, cmp_code, visited_phis, - visited_flag_phis)) + visited_flag_phis, max_attempts)) return false; bitmap_clear_bit (*visited_flag_phis, SSA_NAME_VERSION (phi_result)); @@ -634,9 +639,10 @@ uninit_analysis::overlap (gphi *phi, unsigned opnds, hash_set *visited, value that is in conflict with the use guard/predicate. */ bitmap visited_flag_phis = NULL; gphi *phi_def = as_a (flag_def); + unsigned max_attempts = param_uninit_max_prune_work; bool all_pruned = prune_phi_opnds (phi, opnds, phi_def, boundary_cst, cmp_code, visited, -&visited_flag_phis); +&visited_flag_phis, max_attempts); if (visited_flag_phis) BITMAP_FREE (visited_flag_phis); if (all_pruned) diff --git a/gcc/gimple-predicate-analysis.h b/gcc/gimple-predicate-analysis.h index f71061ec2836..67a19aa09052 100644 --- a/gcc/gimple-predicate-analysis.h +++ b/gcc/gimple-predicate-analysis.h @@ -152,7 +152,7 @@ private: bool is_use_guarded (gimple *, basic_block, gphi *, unsigned, hash_set *); bool prune_phi_opnds (gphi *, unsigned, gphi *, tree, tree_code, - hash_set *, bitmap *); + hash_set *, bitmap *, unsigned &); bool overlap (gphi *, unsigned, hash_set *, const predicate &); void collect_phi_def_edges (gphi *, basic_block, vec *, diff --git a/gcc/params.opt b/gcc/params.opt index a2b606fb9178..412b6701fadc 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -1189,6 +1189,10 @@ predicate chain. Com
[gcc r16-1682] value-relation.h: Mark dom_oracle::next_relation as override
https://gcc.gnu.org/g:00712aebedcd79854d180b842a9300e8325fb184 commit r16-1682-g00712aebedcd79854d180b842a9300e8325fb184 Author: Martin Jambor Date: Wed Jun 25 17:03:39 2025 +0200 value-relation.h: Mark dom_oracle::next_relation as override When GCC is compiled with clang, it emits a warning that dom_oracle::next_relation is not marked as override even though it does override a virtual function of its ancestor. This patch marks it as such to silence the warning and for the sake of consistency. There are other member functions in the class which are marked as final override but this particular function is in the protected section so I decided to just mark it as override. gcc/ChangeLog: 2025-06-24 Martin Jambor * value-relation.h (class dom_oracle): Mark member function next_relation as override. Diff: --- gcc/value-relation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/value-relation.h b/gcc/value-relation.h index 1081877ccca7..87f0d856fabd 100644 --- a/gcc/value-relation.h +++ b/gcc/value-relation.h @@ -235,7 +235,7 @@ public: void dump (FILE *f) const final override; protected: virtual relation_chain *next_relation (basic_block, relation_chain *, -tree) const; +tree) const override; bool m_do_trans_p; bitmap m_tmp, m_tmp2; bitmap m_relation_set; // Index by ssa-name. True if a relation exists
[gcc r15-9859] rtl-ssa: Reject non-address uses of autoinc regs [PR120347]
https://gcc.gnu.org/g:2efe8cc55581a5fecb388646f1908eed4ec11a63 commit r15-9859-g2efe8cc55581a5fecb388646f1908eed4ec11a63 Author: Richard Sandiford Date: Wed Jun 25 17:28:42 2025 +0100 rtl-ssa: Reject non-address uses of autoinc regs [PR120347] As the rtl.texi documentation of RTX_AUTOINC expressions says: If a register used as the operand of these expressions is used in another address in an insn, the original value of the register is used. Uses of the register outside of an address are not permitted within the same insn as a use in an embedded side effect expression because such insns behave differently on different machines and hence must be treated as ambiguous and disallowed. late-combine was failing to follow this rule. One option would have been to enforce it during the substitution phase, like combine does. This could either be a dedicated condition in the substitution code or, more generally, an extra condition in can_merge_accesses. (The latter would include extending is_pre_post_modify to uses.) However, since the restriction applies to patterns rather than to actions on patterns, the more robust fix seemed to be test and reject this case in (a subroutine of) rtl_ssa::recog. We already do something similar for hard-coded register clobbers. Using vec_rtx_properties isn't the lightest-weight operation out there. I did wonder about relying on the is_pre_post_modify flag of the definitions in the new_defs array, but that would require callers that create new autoincs to set the flag before calling recog. Normally these flags are instead updated automatically based on the final pattern. Besides, recog itself has had to traverse the whole pattern, and it is even less light-weight than vec_rtx_properties. At least the pattern should be in cache. The rtl-ssa fix showed up a mistake (of mine) in the rtl_properties walker: try_to_add_src would drop all flags except IN_NOTE before recursing into RTX_AUTOINC addresses. RTX_AUTOINCs only occur in addresses, and so for them, the flags coming into try_to_add_src are set by: unsigned int base_flags = flags & rtx_obj_flags::STICKY_FLAGS; ... if (MEM_P (x)) { ... unsigned int addr_flags = base_flags | rtx_obj_flags::IN_MEM_STORE; if (flags & rtx_obj_flags::IS_READ) addr_flags |= rtx_obj_flags::IN_MEM_LOAD; try_to_add_src (XEXP (x, 0), addr_flags); return; } This means that the only flags that can be set are: - IN_NOTE (the sole member of STICKY_FLAGS) - IN_MEM_STORE - IN_MEM_LOAD Thus dropping all flags except IN_NOTE had the effect of dropping IN_MEM_STORE and IN_MEM_LOAD, and nothing else. But those flags are the ones that mark something as being part of a mem address. The exclusion was therefore exactly wrong. gcc/ PR rtl-optimization/120347 * rtlanal.cc (rtx_properties::try_to_add_src): Don't drop the IN_MEM_LOAD and IN_MEM_STORE flags for autoinc registers. * rtl-ssa/changes.cc (recog_level2): Check whether an RTX_AUTOINCed register also appears outside of an address. gcc/testsuite/ PR rtl-optimization/120347 * gcc.dg/torture/pr120347.c: New test. (cherry picked from commit e322dff09d011f65f5cae4e95c3a24ccfae7b1e1) Diff: --- gcc/rtl-ssa/changes.cc | 18 ++ gcc/rtlanal.cc | 2 +- gcc/testsuite/gcc.dg/torture/pr120347.c | 13 + 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc index eb579ad3ad7e..f7aa6a66cdf5 100644 --- a/gcc/rtl-ssa/changes.cc +++ b/gcc/rtl-ssa/changes.cc @@ -1106,6 +1106,24 @@ recog_level2 (insn_change &change, add_regno_clobber_fn add_regno_clobber) } } + // Per rtl.texi, registers that are modified using RTX_AUTOINC operations + // cannot also appear outside an address. + vec_rtx_properties properties; + properties.add_pattern (pat); + for (rtx_obj_reference def : properties.refs ()) +if (def.is_pre_post_modify ()) + for (rtx_obj_reference use : properties.refs ()) + if (def.regno == use.regno && !use.in_address ()) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "register %d is both auto-modified" +" and used outside an address:\n", def.regno); + print_rtl_single (dump_file, pat); + } + return false; + } + // check_asm_operands checks the constraints after RA, so we don't // need to do it again. if (reload_completed && !asm_p) diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc index
[gcc r16-1675] Promote verify_range to vrange.
https://gcc.gnu.org/g:ec44df73232d07a66fea719c5aaddb6734321c4f commit r16-1675-gec44df73232d07a66fea719c5aaddb6734321c4f Author: Andrew MacLeod Date: Thu Jun 19 21:19:27 2025 -0400 Promote verify_range to vrange. most range classes had a verufy_range, but it was all private. Make it a supported routine from vrange. * value-range.cc (frange::verify_range): Constify. (irange::verify_range): Constify. * value-range.h (vrange::verify_range): New. (irange::verify_range): Make public. (prange::verify_range): Make public. (prange::verify_range): Make public. (value_range::verify_range): New. Diff: --- gcc/value-range.cc | 4 ++-- gcc/value-range.h | 9 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/gcc/value-range.cc b/gcc/value-range.cc index 85c1e26287e9..dc6909e77c54 100644 --- a/gcc/value-range.cc +++ b/gcc/value-range.cc @@ -1205,7 +1205,7 @@ frange::supports_type_p (const_tree type) const } void -frange::verify_range () +frange::verify_range () const { if (!undefined_p ()) gcc_checking_assert (HONOR_NANS (m_type) || !maybe_isnan ()); @@ -1515,7 +1515,7 @@ irange::set (tree min, tree max, value_range_kind kind) // Check the validity of the range. void -irange::verify_range () +irange::verify_range () const { gcc_checking_assert (m_discriminator == VR_IRANGE); if (m_kind == VR_UNDEFINED) diff --git a/gcc/value-range.h b/gcc/value-range.h index c32c5076b63a..5c358f3c70cd 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -111,6 +111,7 @@ public: bool operator== (const vrange &) const; bool operator!= (const vrange &r) const { return !(*this == r); } void dump (FILE *) const; + virtual void verify_range () const { } protected: vrange (enum value_range_discriminator d) : m_discriminator (d) { } ENUM_BITFIELD(value_range_kind) m_kind : 8; @@ -323,6 +324,7 @@ public: virtual void update_bitmask (const class irange_bitmask &) override; virtual irange_bitmask get_bitmask () const override; + virtual void verify_range () const; protected: void maybe_resize (int needed); virtual void set (tree, tree, value_range_kind = VR_RANGE) override; @@ -335,7 +337,6 @@ protected: void normalize_kind (); - void verify_range (); // Hard limit on max ranges allowed. static const int HARD_MAX_RANGES = 255; @@ -421,7 +422,7 @@ public: bool contains_p (const wide_int &) const; wide_int lower_bound () const; wide_int upper_bound () const; - void verify_range () const; + virtual void verify_range () const; irange_bitmask get_bitmask () const final override; void update_bitmask (const irange_bitmask &) final override; protected: @@ -593,14 +594,13 @@ public: bool nan_signbit_p (bool &signbit) const; bool known_isnormal () const; bool known_isdenormal_or_zero () const; - + virtual void verify_range () const; protected: virtual bool contains_p (tree cst) const override; virtual void set (tree, tree, value_range_kind = VR_RANGE) override; private: bool internal_singleton_p (REAL_VALUE_TYPE * = NULL) const; - void verify_range (); bool normalize_kind (); bool union_nans (const frange &); bool intersect_nans (const frange &); @@ -798,6 +798,7 @@ public: void update_bitmask (const class irange_bitmask &bm) { return m_vrange->update_bitmask (bm); } void accept (const vrange_visitor &v) const { m_vrange->accept (v); } + void verify_range () const { m_vrange->verify_range (); } private: void init (tree type); void init (const vrange &);
[gcc r16-1676] Remove unused vector in value-relation.cc.
https://gcc.gnu.org/g:c08d6d90ca5e6e8bdcef180de214971b844542b2 commit r16-1676-gc08d6d90ca5e6e8bdcef180de214971b844542b2 Author: Andrew MacLeod Date: Tue Jun 24 16:51:56 2025 -0400 Remove unused vector in value-relation.cc. The relation_to_code vector in value-relation is now unused, so we can remove it. * value-relation.cc (relation_to_code): Remove. Diff: --- gcc/value-relation.cc | 6 -- 1 file changed, 6 deletions(-) diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc index c7ced445ad76..2ac7650fe5b4 100644 --- a/gcc/value-relation.cc +++ b/gcc/value-relation.cc @@ -202,12 +202,6 @@ adjust_equivalence_range (vrange &range) } } -// This vector maps a relation to the equivalent tree code. - -static const tree_code relation_to_code [VREL_LAST] = { - ERROR_MARK, ERROR_MARK, LT_EXPR, LE_EXPR, GT_EXPR, GE_EXPR, EQ_EXPR, - NE_EXPR }; - // Given an equivalence set EQUIV, set all the bits in B that are still valid // members of EQUIV in basic block BB.
[gcc r16-1680] ranger: Mark several member functions as final override
https://gcc.gnu.org/g:85f0620a8b0cde32fb12e27f602e13445281e670 commit r16-1680-g85f0620a8b0cde32fb12e27f602e13445281e670 Author: Martin Jambor Date: Wed Jun 25 16:59:12 2025 +0200 ranger: Mark several member functions as final override When GCC is built with clang, it emits warnings that several member functions of various ranger classes override a virtual function of an ancestor but are not marked with the override keyword. After inspecting the cases, I found that all these classes had other member functions marked as final override, so I added the final keyword everywhere too. In some cases other such overrides were not explicitly marked as virtual, which made formatting easier. For that reason and also for consistency, in such cases I removed the virtual keyword from the functions I marked as final override too. gcc/ChangeLog: 2025-06-24 Martin Jambor * range-op-mixed.h (class operator_plus): Mark member function overflow_free_p as final override. (class operator_minus): Likewise. (class operator_mult): Likewise. * range-op-ptr.cc (class pointer_plus_operator): Mark member function lhs_op1_relation as final override. * range-op.cc (class operator_div::): Mark member functions op2_range and update_bitmask as final override. (class operator_logical_and): Mark member functions fold_range, op1_range and op2_range as final override. Remove unnecessary virtual. (class operator_logical_or): Likewise. (class operator_logical_not): Mark member functions fold_range and op1_range as final override. Remove unnecessary virtual. formatting easier. (class operator_absu): Mark member functions wi_fold as final override. Diff: --- gcc/range-op-mixed.h | 12 - gcc/range-op-ptr.cc | 2 +- gcc/range-op.cc | 72 +++- 3 files changed, 44 insertions(+), 42 deletions(-) diff --git a/gcc/range-op-mixed.h b/gcc/range-op-mixed.h index f8f183069046..567b0cdd31bb 100644 --- a/gcc/range-op-mixed.h +++ b/gcc/range-op-mixed.h @@ -558,8 +558,8 @@ public: void update_bitmask (irange &r, const irange &lh, const irange &rh) const final override; - virtual bool overflow_free_p (const irange &lh, const irange &rh, - relation_trio = TRIO_VARYING) const; + bool overflow_free_p (const irange &lh, const irange &rh, + relation_trio = TRIO_VARYING) const final override; // Check compatibility of all operands. bool operand_check_p (tree t1, tree t2, tree t3) const final override { return range_compatible_p (t1, t2) && range_compatible_p (t1, t3); } @@ -634,8 +634,8 @@ public: void update_bitmask (irange &r, const irange &lh, const irange &rh) const final override; - virtual bool overflow_free_p (const irange &lh, const irange &rh, - relation_trio = TRIO_VARYING) const; + bool overflow_free_p (const irange &lh, const irange &rh, + relation_trio = TRIO_VARYING) const final override; // Check compatibility of all operands. bool operand_check_p (tree t1, tree t2, tree t3) const final override { return range_compatible_p (t1, t2) && range_compatible_p (t1, t3); } @@ -720,8 +720,8 @@ public: const REAL_VALUE_TYPE &lh_lb, const REAL_VALUE_TYPE &lh_ub, const REAL_VALUE_TYPE &rh_lb, const REAL_VALUE_TYPE &rh_ub, relation_kind kind) const final override; - virtual bool overflow_free_p (const irange &lh, const irange &rh, - relation_trio = TRIO_VARYING) const; + bool overflow_free_p (const irange &lh, const irange &rh, + relation_trio = TRIO_VARYING) const final override; // Check compatibility of all operands. bool operand_check_p (tree t1, tree t2, tree t3) const final override { return range_compatible_p (t1, t2) && range_compatible_p (t1, t3); } diff --git a/gcc/range-op-ptr.cc b/gcc/range-op-ptr.cc index 6aadc9cf2c95..e0e21ad1b2a3 100644 --- a/gcc/range-op-ptr.cc +++ b/gcc/range-op-ptr.cc @@ -315,7 +315,7 @@ public: virtual relation_kind lhs_op1_relation (const prange &lhs, const prange &op1, const irange &op2, - relation_kind) const; + relation_kind) const final override; void update_bitmask (prange &r, const prange &lh, const irange &rh) const { update_known_bitmask (r, POINTER_PLUS_EXPR, lh, rh); } } op_pointer_plus; diff --git a/gcc/range-op.cc b/gcc/range-op.cc index 0a3f0b6b56c7..1f91066a44e7 100644 --- a/gcc/range-op.cc +++ b/gc
[gcc r15-9861] Fortran: Source allocation of pure function result rejected [PR119948]
https://gcc.gnu.org/g:8422524f6f43263caca2c2ab8a0e890e92f5f114 commit r15-9861-g8422524f6f43263caca2c2ab8a0e890e92f5f114 Author: Paul Thomas Date: Thu May 1 15:22:54 2025 +0100 Fortran: Source allocation of pure function result rejected [PR119948] 2025-05-07 Paul Thomas and Steven G. Kargl gcc/fortran PR fortran/119948 * resolve.cc (gfc_impure_variable): The result of a module procedure with an interface declaration is not impure even if the current namespace is not the same as the symbol's. * primary.cc (match_variable): Module procedures with sym the same as result can be treated as variables, although marked external. gcc/testsuite/ PR fortran/119948 * gfortran.dg/pr119948.f90: New test. (cherry picked from commit 0abc77da9d704bba55a376bb5c162a54826ab94a) Diff: --- gcc/fortran/primary.cc | 2 +- gcc/fortran/resolve.cc | 10 gcc/testsuite/gfortran.dg/pr119948.f90 | 83 ++ 3 files changed, 94 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/primary.cc b/gcc/fortran/primary.cc index b5e5481b..f0e1fef6812e 100644 --- a/gcc/fortran/primary.cc +++ b/gcc/fortran/primary.cc @@ -4448,7 +4448,7 @@ match_variable (gfc_expr **result, int equiv_flag, int host_flag) case FL_PROCEDURE: /* Check for a nonrecursive function result variable. */ if (sym->attr.function - && !sym->attr.external + && (!sym->attr.external || sym->abr_modproc_decl) && sym->result == sym && (gfc_is_function_return_value (sym, gfc_current_ns) || (sym->attr.entry diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc index c5c10205dadf..ee5b22a728d4 100644 --- a/gcc/fortran/resolve.cc +++ b/gcc/fortran/resolve.cc @@ -18478,6 +18478,16 @@ gfc_impure_variable (gfc_symbol *sym) if (sym->attr.use_assoc || sym->attr.in_common) return 1; + /* The namespace of a module procedure interface holds the arguments and + symbols, and so the symbol namespace can be different to that of the + procedure. */ + if (sym->ns != gfc_current_ns + && gfc_current_ns->proc_name->abr_modproc_decl + && sym->ns->proc_name->attr.function + && sym->attr.result + && !strcmp (sym->ns->proc_name->name, gfc_current_ns->proc_name->name)) +return 0; + /* Check if the symbol's ns is inside the pure procedure. */ for (ns = gfc_current_ns; ns; ns = ns->parent) { diff --git a/gcc/testsuite/gfortran.dg/pr119948.f90 b/gcc/testsuite/gfortran.dg/pr119948.f90 new file mode 100644 index ..2e36fae5a9de --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr119948.f90 @@ -0,0 +1,83 @@ +! { dg-do run } +! +! Test the fix for PR119948, which used to fail as indicated below with: +! (1) "Error: Bad allocate-object at (1) for a PURE procedure" +! (2) "Error: ‘construct_test2 at (1) is not a variable" +! +! Contributed by Damian Rouson +! +module test_m + implicit none + + type test_t +integer, allocatable :: i + end type + + interface +pure module function construct_test(arg) result(test) + implicit none + type(test_t) :: test + type(test_t), intent(in) :: arg +end function + +pure module function construct_test2(arg) + implicit none + type(test_t) construct_test2 + type(test_t), intent(in) :: arg +end function + +pure module function construct_test_3(arg) result(test) + implicit none + type(test_t) :: test + type(test_t), intent(in) :: arg +end function + +pure module function construct_test_4(arg) + implicit none + type(test_t) :: construct_test_4 + type(test_t), intent(in) :: arg +end function + end interface + +contains + module procedure construct_test +allocate(test%i, source = arg%i) ! Fail #1 + end procedure + + module procedure construct_test2 +allocate(construct_test2%i, source = arg%i)! Fail #2 + end procedure +end module + +submodule (test_m)test_s +contains + module procedure construct_test_3 +allocate(test%i, source = arg%i) ! This was OK. + end procedure + + module procedure construct_test_4 +allocate(construct_test_4%i, source = arg%i) ! This was OK. + end procedure +end submodule + + use test_m + type(test_t) :: res, dummy +! + dummy%i = int (rand () * 1e6) + res = construct_test (dummy) + if (res%i /= dummy%i) stop 1 +! + dummy%i = int (rand () * 1e6) + res = construct_test2 (dummy) + if (res%i /= dummy%i) stop 2 +! + dummy%i = int (rand () * 1e6) + res = construct_test_3 (dummy) + if (res%i /= dummy%i) stop 3 + + dummy%i = int (rand () * 1e6) + res = construct_test_4 (dummy) + if (res%i /= dummy%i) stop 4 + + deallocate (res%i, dummy%i) +end
[gcc r16-1688] arc: Add commutative multiplication patterns
https://gcc.gnu.org/g:49943c2c0adf6b66b62e66dafde08d1dbdcfc621 commit r16-1688-g49943c2c0adf6b66b62e66dafde08d1dbdcfc621 Author: Luis Silva Date: Wed Jun 25 17:54:12 2025 +0300 arc: Add commutative multiplication patterns This patch introduces two new instruction patterns: `*mulsi3_cmp0`: This pattern performs a multiplication and sets the CC_Z register based on the result, while also storing the result of the multiplication in a general-purpose register. `*mulsi3_cmp0_noout`: This pattern performs a multiplication and sets the CC_Z register based on the result without storing the result in a general-purpose register. These patterns are optimized to generate code using the `mpy.f` instruction, specifically used where the result is compared to zero. In addition, the previous commutative multiplication implementation was removed. It incorrectly took into account the negative flag, which is wrong. This new implementation only considers the zero flag. A test case has been added to verify the correctness of these changes. gcc/ChangeLog: * config/arc/arc.cc (arc_select_cc_mode): Handle multiplication results compared against zero, selecting CC_Zmode. * config/arc/arc.md (*mulsi3_cmp0): New define_insn. (*mulsi3_cmp0_noout): New define_insn. gcc/testsuite/ChangeLog: * gcc.target/arc/mult-cmp0.c: New test. Signed-off-by: Luis Silva Diff: --- gcc/config/arc/arc.cc| 7 gcc/config/arc/arc.md| 32 ++-- gcc/testsuite/gcc.target/arc/mult-cmp0.c | 66 3 files changed, 102 insertions(+), 3 deletions(-) diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index 40308263ff33..bb5db977c800 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -1553,6 +1553,13 @@ arc_select_cc_mode (enum rtx_code op, rtx x, rtx y) machine_mode mode = GET_MODE (x); rtx x1; + /* Matches all instructions which can do .f and clobbers only Z flag. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && y == const0_rtx + && GET_CODE (x) == MULT + && (op == EQ || op == NE)) +return CC_Zmode; + /* For an operation that sets the condition codes as a side-effect, the C and V flags is not set as for cmp, so we can only use comparisons where this doesn't matter. (For LT and GE we can use "mi" and "pl" diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 2394eb8c001e..96921207cc41 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -1068,11 +1068,37 @@ archs4x, archs4xd" (set_attr "cond" "set_zn") (set_attr "length" "*,4,4,4,8")]) -;; The next two patterns are for plos, ior, xor, and, and mult. +(define_insn "*mulsi3_cmp0" + [(set (reg:CC_Z CC_REG) + (compare:CC_Z +(mult:SI + (match_operand:SI 1 "register_operand" "%r,0,r") + (match_operand:SI 2 "nonmemory_operand" "rL,I,i")) +(const_int 0))) + (set (match_operand:SI 0 "register_operand""=r,r,r") + (mult:SI (match_dup 1) (match_dup 2)))] + "TARGET_MPY" + "mpy%?.f\\t%0,%1,%2" + [(set_attr "length" "4,4,8") + (set_attr "type" "multi")]) + +(define_insn "*mulsi3_cmp0_noout" + [(set (reg:CC_Z CC_REG) + (compare:CC_Z +(mult:SI + (match_operand:SI 0 "register_operand" "%r,r,r") + (match_operand:SI 1 "nonmemory_operand" "rL,I,i")) +(const_int 0)))] + "TARGET_MPY" + "mpy%?.f\\t0,%0,%1" + [(set_attr "length" "4,4,8") + (set_attr "type" "multi")]) + +;; The next two patterns are for plus, ior, xor, and. (define_insn "*commutative_binary_cmp0_noout" [(set (match_operand 0 "cc_set_register" "") (match_operator 4 "zn_compare_operator" - [(match_operator:SI 3 "commutative_operator" + [(match_operator:SI 3 "commutative_operator_sans_mult" [(match_operand:SI 1 "register_operand" "%r,r") (match_operand:SI 2 "nonmemory_operand" "rL,Cal")]) (const_int 0)]))] @@ -1085,7 +,7 @@ archs4x, archs4xd" (define_insn "*commutative_binary_cmp0" [(set (match_operand 3 "cc_set_register" "") (match_operator 5 "zn_compare_operator" - [(match_operator:SI 4 "commutative_operator" + [(match_operator:SI 4 "commutative_operator_sans_mult" [(match_operand:SI 1 "register_operand" "%0, 0,r,r") (match_operand:SI 2 "nonmemory_operand" "rL,rI,r,Cal")]) (const_int 0)])) diff --git a/gcc/testsuite/gcc.target/arc/mult-cmp0.c b/gcc/testsuite/gcc.target/arc/mult-cmp0.c new file mode 100644 index ..680c72eaa6de --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/mult-cmp0.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +/* mpy.f r1,r0,r1 + mov_s r0,5;3 + j_s.d [blink] + mov.ne r0,r
[gcc r16-1685] ARC: Use intrinsics for __builtin_add_overflow*()
https://gcc.gnu.org/g:467fa6d257efdedf7b59168c759c97d3cb08c17d commit r16-1685-g467fa6d257efdedf7b59168c759c97d3cb08c17d Author: Shahab Vahedi Date: Wed Jun 25 17:22:45 2025 +0300 ARC: Use intrinsics for __builtin_add_overflow*() This patch covers signed and unsigned additions. The generated code would be something along these lines: signed: add.f r0, r1, r2 b.v @label unsigned: add.f r0, r1, r2 b.c @label gcc/ * config/arc/arc-modes.def (CC_V): New mode. * config/arc/arc-protos.h (arc_gen_unlikely_cbranch): New function declaration. * config/arc/arc.cc (arc_gen_unlikely_cbranch): New function. (get_arc_condition_code): Handle new mode. * config/arc/arc.md (addvsi3_v, addvsi4, addsi3_c, uaddvsi4): New patterns. * config/arc/predicates.md (proper_comparison_operator): Handel the new V_mode. (equality_comparison_operator): Likewise. gcc/testsuite/ * gcc.target/arc/overflow-1.c: New file Diff: --- gcc/config/arc/arc-modes.def | 1 + gcc/config/arc/arc-protos.h | 1 + gcc/config/arc/arc.cc | 22 +++ gcc/config/arc/arc.md | 50 +++ gcc/config/arc/predicates.md | 9 ++- gcc/testsuite/gcc.target/arc/overflow-1.c | 100 ++ 6 files changed, 182 insertions(+), 1 deletion(-) diff --git a/gcc/config/arc/arc-modes.def b/gcc/config/arc/arc-modes.def index cab46d7ce9c6..7c7dff9146a5 100644 --- a/gcc/config/arc/arc-modes.def +++ b/gcc/config/arc/arc-modes.def @@ -24,6 +24,7 @@ along with GCC; see the file COPYING3. If not see CC_MODE (CC_ZN); CC_MODE (CC_Z); +CC_MODE (CC_V); CC_MODE (CC_C); CC_MODE (CC_FP_GT); CC_MODE (CC_FP_GE); diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h index cd82aa1ef5e5..2db643cea08c 100644 --- a/gcc/config/arc/arc-protos.h +++ b/gcc/config/arc/arc-protos.h @@ -55,6 +55,7 @@ extern bool arc_check_mov_const (HOST_WIDE_INT ); extern bool arc_split_mov_const (rtx *); extern bool arc_can_use_return_insn (void); extern bool arc_split_move_p (rtx *); +extern void arc_gen_unlikely_cbranch (enum rtx_code, machine_mode, rtx); #endif /* RTX_CODE */ diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index 78ba814d223a..40308263ff33 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -1436,6 +1436,13 @@ get_arc_condition_code (rtx comparison) case GEU : return ARC_CC_NC; default : gcc_unreachable (); } +case E_CC_Vmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_NV; + case NE : return ARC_CC_V; + default : gcc_unreachable (); + } case E_CC_FP_GTmode: if (TARGET_ARGONAUT_SET && TARGET_SPFP) switch (GET_CODE (comparison)) @@ -11543,6 +11550,21 @@ arc_libm_function_max_error (unsigned cfn, machine_mode mode, return default_libm_function_max_error (cfn, mode, boundary_p); } +void +arc_gen_unlikely_cbranch (enum rtx_code cmp, machine_mode cc_mode, rtx label) +{ + rtx cc_reg, x; + + cc_reg = gen_rtx_REG (cc_mode, CC_REG); + label = gen_rtx_LABEL_REF (VOIDmode, label); + + x = gen_rtx_fmt_ee (cmp, VOIDmode, cc_reg, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, label, pc_rtx); + + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); +} + + #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 1344d9c68b02..c81a7f19d228 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -2734,6 +2734,56 @@ archs4x, archs4xd" } [(set_attr "length" "8")]) +(define_insn "addsi3_v" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r") + (plus:SI (match_operand:SI 1 "register_operand" "r,r,0, r") + (match_operand:SI 2 "nonmemory_operand" "r,L,I,C32"))) + (set (reg:CC_V CC_REG) + (compare:CC_V (sign_extend:DI (plus:SI (match_dup 1) + (match_dup 2))) +(plus:DI (sign_extend:DI (match_dup 1)) + (sign_extend:DI (match_dup 2)] + "" + "add.f\\t%0,%1,%2" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,4,4,8")]) + +(define_expand "addvsi4" + [(match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand") + (label_ref (match_operand 3 "" ""))] + "" + "emit_insn (gen_addsi3_v (operands[0], operands[1], operands[2])); + arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE;") + +(define_insn "addsi3_c" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r") + (plus:SI (match_operand:
[gcc r16-1648] fortran: Mention user variable in SELECT TYPE temporary variable names
https://gcc.gnu.org/g:3f88230da6a5fbdee1458ea6abc61ecb4b926cd0 commit r16-1648-g3f88230da6a5fbdee1458ea6abc61ecb4b926cd0 Author: Mikael Morin Date: Fri Jun 20 12:08:02 2025 +0200 fortran: Mention user variable in SELECT TYPE temporary variable names The temporary variables that are generated to implement SELECT TYPE and TYPE IS statements have (before this change) a name depending only on the type. This can produce confusing dumps with code having multiple SELECT TYPE statements, as it isn't obvious which SELECT TYPE construct the variable relates to. This is especially the case with nested SELECT TYPE statements and with SELECT TYPE variables having identical types (and thus identical names). This change adds one additional user-provided discriminating string in the variable names, using the value from the SELECT TYPE variable name or last component reference name. The additional string may be truncated to fit in the temporary buffer. This requires all buffers to have matching sizes to get the same resulting name everywhere. gcc/fortran/ChangeLog: * misc.cc (gfc_var_name_for_select_type_temp): New function. * gfortran.h (gfc_var_name_for_select_type_temp): Declare it. * resolve.cc (resolve_select_type): Pick a discriminating name from the SELECT TYPE variable reference and use it in the name of the temporary variable that is generated. Truncate name to the buffer size. * match.cc (select_type_set_tmp): Likewise. Pass the discriminating name... (select_intrinsic_set_tmp): ... to this function. Use the discriminating name likewise. Augment the buffer size to match that of select_type_set_tmp and resolve_select_type. gcc/testsuite/ChangeLog: * gfortran.dg/select_type_51.f90: New test. Diff: --- gcc/fortran/gfortran.h | 2 ++ gcc/fortran/match.cc | 24 +++--- gcc/fortran/misc.cc | 21 gcc/fortran/resolve.cc | 21 ++-- gcc/testsuite/gfortran.dg/select_type_51.f90 | 37 5 files changed, 88 insertions(+), 17 deletions(-) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index f73b5f9c23f4..6848bd1762d3 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -3507,6 +3507,8 @@ void gfc_done_2 (void); int get_c_kind (const char *, CInteropKind_t *); +const char * gfc_var_name_for_select_type_temp (gfc_expr *); + const char *gfc_closest_fuzzy_match (const char *, char **); inline void vec_push (char **&optr, size_t &osz, const char *elt) diff --git a/gcc/fortran/match.cc b/gcc/fortran/match.cc index a99a757bede6..aa0b04afd563 100644 --- a/gcc/fortran/match.cc +++ b/gcc/fortran/match.cc @@ -7171,9 +7171,11 @@ select_type_push (gfc_symbol *sel) /* Set the temporary for the current intrinsic SELECT TYPE selector. */ static gfc_symtree * -select_intrinsic_set_tmp (gfc_typespec *ts) +select_intrinsic_set_tmp (gfc_typespec *ts, const char *var_name) { - char name[GFC_MAX_SYMBOL_LEN]; + /* Keep size in sync with the buffer size in resolve_select_type as it + determines the final name through truncation. */ + char name[GFC_MAX_SYMBOL_LEN + 12 + 1]; gfc_symtree *tmp; HOST_WIDE_INT charlen = 0; gfc_symbol *selector = select_type_stack->selector; @@ -7192,12 +7194,12 @@ select_intrinsic_set_tmp (gfc_typespec *ts) charlen = gfc_mpz_get_hwi (ts->u.cl->length->value.integer); if (ts->type != BT_CHARACTER) -sprintf (name, "__tmp_%s_%d", gfc_basic_typename (ts->type), -ts->kind); +snprintf (name, sizeof (name), "__tmp_%s_%d_%s", + gfc_basic_typename (ts->type), ts->kind, var_name); else snprintf (name, sizeof (name), - "__tmp_%s_" HOST_WIDE_INT_PRINT_DEC "_%d", - gfc_basic_typename (ts->type), charlen, ts->kind); + "__tmp_%s_" HOST_WIDE_INT_PRINT_DEC "_%d_%s", + gfc_basic_typename (ts->type), charlen, ts->kind, var_name); gfc_get_sym_tree (name, gfc_current_ns, &tmp, false); sym = tmp->n.sym; @@ -7239,7 +7241,9 @@ select_type_set_tmp (gfc_typespec *ts) return; } - tmp = select_intrinsic_set_tmp (ts); + gfc_expr *select_type_expr = gfc_state_stack->construct->expr1; + const char *var_name = gfc_var_name_for_select_type_temp (select_type_expr); + tmp = select_intrinsic_set_tmp (ts, var_name); if (tmp == NULL) { @@ -7247,9 +7251,11 @@ select_type_set_tmp (gfc_typespec *ts) return; if (ts->type == BT_CLASS) - sprintf (name, "__tmp_class_%s", ts->u.derived->name); + snprintf (name, sizeof (name), "__tmp_class_%s_%s", ts->u.derived->name, + var_name); else - sprintf (name, "__tmp_type_%s",
[gcc r16-1663] Remove early inlining from afdo pass
https://gcc.gnu.org/g:3fde750a29820a1ccd6dd399bdfa0cf3d97a2c30 commit r16-1663-g3fde750a29820a1ccd6dd399bdfa0cf3d97a2c30 Author: Jan Hubicka Date: Wed Jun 25 02:59:54 2025 +0200 Remove early inlining from afdo pass This pass removes early-inlining from afdo pass since all inlining should now happen from early inliner. I tedted this on spec and there are 3 inlines happening here which are blocked at early-inline time by hitting large function growth limit. We probably want to bypass that limit, I will look into that incrementaly. This should make the non-inlined function profile merging hopefully easier. It may still make sense to separate afdo inliner from early inliner to solve the non-transitivity issues which is not that hard to do with current code orgnaization. However this should be separate IPA pass rather then another part of afdo pass, since it can be coneptually separate. gcc/ChangeLog: * auto-profile.cc: Update toplevel comment. (early_inline): Remove. (auto_profile): Don't do early inlining. Diff: --- gcc/auto-profile.cc | 39 +++ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index 8a1d9f878c65..3f8310e6324b 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -76,21 +76,30 @@ along with GCC; see the file COPYING3. If not see standalone symbol, or a clone of a function that is inlined into another function. - Phase 2: Early inline + value profile transformation. - Early inline uses autofdo_source_profile to find if a callsite is: + Phase 2: AFDO inline + value profile transformation. + This happens during early optimization. + During early inlning AFDO inliner is executed which + uses autofdo_source_profile to find if a callsite is: * inlined in the profiled binary. * callee body is hot in the profiling run. If both condition satisfies, early inline will inline the callsite regardless of the code growth. - Phase 2 is an iterative process. During each iteration, we also check - if an indirect callsite is promoted and inlined in the profiling run. - If yes, vpt will happen to force promote it and in the next iteration, - einline will inline the promoted callsite in the next iteration. + + Performing this early has benefit of doing early optimizations + before read IPA passe and getting more "context sensitivity" of + the profile read. Profile of inlined functions may differ + significantly form one inline instance to another and from the + offline version. + + This is controlled by -fauto-profile-inlinig and is independent + of -fearly-inlining. Phase 3: Annotate control flow graph. AutoFDO uses a separate pass to: * Annotate basic block count * Estimate branch probability + * Use earlier static profile to fill in the gaps + if AFDO profile is ambigous After the above 3 phases, all profile is readily annotated on the GCC IR. AutoFDO tries to reuse all FDO infrastructure as much as possible to make @@ -2217,18 +2226,6 @@ afdo_annotate_cfg (void) free_dominance_info (CDI_POST_DOMINATORS); } -/* Wrapper function to invoke early inliner. */ - -static unsigned int -early_inline () -{ - compute_fn_summary (cgraph_node::get (current_function_decl), true); - unsigned int todo = early_inliner (cfun); - if (todo & TODO_update_ssa_any) -update_ssa (TODO_update_ssa); - return todo; -} - /* Use AutoFDO profile to annoate the control flow graph. Return the todo flag. */ @@ -2254,15 +2251,9 @@ auto_profile (void) push_cfun (DECL_STRUCT_FUNCTION (node->decl)); -unsigned int todo = early_inline (); autofdo::afdo_annotate_cfg (); compute_function_frequency (); -/* Local pure-const may imply need to fixup the cfg. */ -todo |= execute_fixup_cfg (); -if (todo & TODO_cleanup_cfg) - cleanup_tree_cfg (); - free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); cgraph_edge::rebuild_edges ();
[gcc] Created branch 'meissner/heads/work212-dmf' in namespace 'refs/users'
The branch 'meissner/heads/work212-dmf' was created in namespace 'refs/users' pointing to: 09be5ec3d304... Add ChangeLog.meissner and REVISION.
[gcc r16-1677] Mark rtl_avoid_store_forwarding functions final override
https://gcc.gnu.org/g:fb1ba48f557acc89f0616d330c8ef5caf613aff7 commit r16-1677-gfb1ba48f557acc89f0616d330c8ef5caf613aff7 Author: Martin Jambor Date: Wed Jun 25 16:48:44 2025 +0200 Mark rtl_avoid_store_forwarding functions final override It is customary to mark the gate and execute functions of the classes representing passes as final override but this is missing in pass_rtl_avoid_store_forwarding. This patch adds it which also silences a clang warning about it. gcc/ChangeLog: 2025-06-24 Martin Jambor * avoid-store-forwarding.cc (class pass_rtl_avoid_store_forwarding): Mark member function gate as final override. Diff: --- gcc/avoid-store-forwarding.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc index 6825d0426ecc..37e095316c93 100644 --- a/gcc/avoid-store-forwarding.cc +++ b/gcc/avoid-store-forwarding.cc @@ -80,12 +80,12 @@ public: {} /* opt_pass methods: */ - virtual bool gate (function *) + virtual bool gate (function *) final override { return flag_avoid_store_forwarding && optimize >= 1; } - virtual unsigned int execute (function *) override; + virtual unsigned int execute (function *) final override; }; // class pass_rtl_avoid_store_forwarding /* Handler for finding and avoiding store forwardings. */
[gcc r12-11227] tree-optimization/119057 - bogus double reduction detection
https://gcc.gnu.org/g:ad756e186f6352b1369c8094ec8973736142933e commit r12-11227-gad756e186f6352b1369c8094ec8973736142933e Author: Richard Biener Date: Mon Mar 3 13:21:53 2025 +0100 tree-optimization/119057 - bogus double reduction detection We are detecting a cycle as double reduction where the inner loop cycle has extra out-of-loop uses. This clashes at least with assumptions from the SLP discovery code which says the cycle isn't reachable from another SLP instance. It also was not intended to support this case, in fact with GCC 14 we seem to generate wrong code here. PR tree-optimization/119057 * tree-vect-loop.cc (check_reduction_path): Add argument specifying whether we're analyzing the inner loop of a double reduction. Do not allow extra uses outside of the double reduction cycle in this case. (vect_is_simple_reduction): Adjust. * gcc.dg/vect/pr119057.c: New testcase. (cherry picked from commit 758de6263dfc7ba8701965fa468691ac23cb7eb5) Diff: --- gcc/testsuite/gcc.dg/vect/pr119057.c | 19 +++ gcc/tree-vect-loop.cc| 12 +++- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr119057.c b/gcc/testsuite/gcc.dg/vect/pr119057.c new file mode 100644 index ..582bb8ff86c3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr119057.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fno-tree-vrp -fno-tree-forwprop" } */ + +int a, b, c, d; +unsigned e; +static void f(void) +{ + unsigned h; + for (d = 0; d < 2; d++) +b |= e; + h = b; + c |= h; +} +int main() +{ + for (; a; a++) +f(); + return 0; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 1abc43f396ec..f46cabf22269 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -3379,7 +3379,8 @@ needs_fold_left_reduction_p (tree type, code_helper code) static bool check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, tree loop_arg, code_helper *code, - vec > &path) + vec > &path, + bool inner_loop_of_double_reduc) { auto_bitmap visited; tree lookfor = PHI_RESULT (phi); @@ -3509,7 +3510,8 @@ pop: break; } /* Check there's only a single stmt the op is used on. For the -not value-changing tail and the last stmt allow out-of-loop uses. +not value-changing tail and the last stmt allow out-of-loop uses, +but not when this is the inner loop of a double reduction. ??? We could relax this and handle arbitrary live stmts by forcing a scalar epilogue for example. */ imm_use_iterator imm_iter; @@ -3518,7 +3520,7 @@ pop: unsigned cnt = 0; FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op.ops[opi]) if (!is_gimple_debug (op_use_stmt) - && (*code != ERROR_MARK + && ((*code != ERROR_MARK || inner_loop_of_double_reduc) || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) cnt++; @@ -3537,7 +3539,7 @@ check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, { auto_vec > path; code_helper code_; - return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path) + return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path, false) && code_ == code); } @@ -3737,7 +3739,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, auto_vec > path; code_helper code; if (check_reduction_path (vect_location, loop, phi, latch_def, &code, - path)) + path, inner_loop_of_double_reduc)) { STMT_VINFO_REDUC_CODE (phi_info) = code; if (code == COND_EXPR && !nested_in_vect_loop)
[gcc r16-1690] x86: Add debug dump for the remove_redundant_vector pass
https://gcc.gnu.org/g:b8b08a8514003280050003d6d56657cb1b71fb88 commit r16-1690-gb8b08a8514003280050003d6d56657cb1b71fb88 Author: H.J. Lu Date: Sat May 10 16:57:58 2025 +0800 x86: Add debug dump for the remove_redundant_vector pass Add debug dump for the remove_redundant_vector pass with the following output: Replace: (insn 7 4 8 2 (set (reg:V2DI 103) (const_vector:V2DI [ (const_int 0 [0]) repeated x2 ])) "x.c":8:13 2406 {movv2di_internal} (nil)) with: (insn 7 4 8 2 (set (reg:V2DI 103) (subreg:V2DI (reg:V32QI 109) 0)) "x.c":8:13 2406 {movv2di_internal} (nil)) ... Replace: (insn 16 15 17 3 (set (reg:V4DI 105) (const_vector:V4DI [ (const_int 0 [0]) repeated x4 ])) "x.c":13:28 2405 {movv4di_internal} (nil)) with: (insn 16 15 17 3 (set (reg:V4DI 105) (subreg:V4DI (reg:V32QI 109) 0)) "x.c":13:28 2405 {movv4di_internal} (nil)) ... Place: (insn 25 5 23 2 (set (reg:V32QI 109) (const_vector:V32QI [ (const_int 0 [0]) repeated x32 ])) -1 (nil)) after: (insn 23 25 24 2 (set (reg/f:DI 107 [ mem1 ]) (reg:DI 5 di [ mem1 ])) "x.c":5:1 95 {*movdi_internal} (expr_list:REG_DEAD (reg:DI 5 di [ mem1 ]) (nil))) in the *.309r.rrvl debug dump. * config/i386/i386-features.cc (ix86_place_single_vector_set): Add debug dump. (replace_vector_const): Likewise. (remove_redundant_vector_load): Likewise. Signed-off-by: H.J. Lu Diff: --- gcc/config/i386/i386-features.cc | 65 1 file changed, 60 insertions(+), 5 deletions(-) diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 36878aab9688..be2ce3103dde 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3116,10 +3116,30 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs, rtx_insn *set_insn; if (insn == BB_HEAD (bb)) -set_insn = emit_insn_before (set, insn); +{ + set_insn = emit_insn_before (set, insn); + if (dump_file) + { + fprintf (dump_file, "\nPlace:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\n"); + } +} else -set_insn = emit_insn_after (set, - insn ? PREV_INSN (insn) : BB_END (bb)); +{ + rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb); + set_insn = emit_insn_after (set, after); + if (dump_file) + { + fprintf (dump_file, "\nPlace:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, after); + fprintf (dump_file, "\n"); + } +} if (inner_scalar) { @@ -3129,7 +3149,15 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs, && GET_MODE (reg) != GET_MODE (inner_scalar)) inner_scalar = gen_rtx_SUBREG (GET_MODE (reg), inner_scalar, 0); rtx set = gen_rtx_SET (reg, inner_scalar); - emit_insn_before (set, set_insn); + insn = emit_insn_before (set, set_insn); + if (dump_file) + { + fprintf (dump_file, "\nAdd:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\n"); + } } } @@ -3416,7 +3444,15 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const, vreg = gen_reg_rtx (vmode); rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0); rtx pat = gen_rtx_SET (vreg, vsubreg); - emit_insn_before (pat, insn); + rtx_insn *vinsn = emit_insn_before (pat, insn); + if (dump_file) + { + fprintf (dump_file, "\nInsert an extra move:\n\n"); + print_rtl_single (dump_file, vinsn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\n"); + } } replace = gen_rtx_SUBREG (mode, vreg, 0); } @@ -3424,11 +3460,22 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const, replace = gen_rtx_SUBREG (mode, vector_const, 0); } + if (dump_file) + { + fprintf (dump_file, "\nReplace:\n\n"); + print_rtl_single (dump_file, insn); + } SET_SRC (set) = replace;
[gcc r12-11228] middle-end/119119 - re-gimplification of empty CTOR assignments
https://gcc.gnu.org/g:81ebacc70c3446218a347902362cc032fc615f6c commit r12-11228-g81ebacc70c3446218a347902362cc032fc615f6c Author: Richard Biener Date: Thu Mar 6 09:08:07 2025 +0100 middle-end/119119 - re-gimplification of empty CTOR assignments The following testcase runs into a re-gimplification issue during inlining when processing MEM[(struct e *)this_2(D)].a = {}; where re-gimplification does not handle assignments in the same way than the gimplifier but instead relies on rhs_predicate_for and gimplifying the RHS standalone. This fails to handle special-casing of CTORs. The is_gimple_mem_rhs_or_call predicate already handles clobbers but not empty CTORs so we end up in the fallback code trying to force the CTOR into a separate stmt using a temporary - but as we have a non-copyable type here that ICEs. The following generalizes empty CTORs in is_gimple_mem_rhs_or_call since those need no additional re-gimplification. PR middle-end/119119 * gimplify.cc (is_gimple_mem_rhs_or_call): All empty CTORs are OK when not a register type. * g++.dg/torture/pr11911.C: New testcase. (cherry picked from commit 3bd61c1dfaa2d7153eb4be82f423533ea937d0f9) Diff: --- gcc/gimplify.cc| 2 +- gcc/testsuite/g++.dg/torture/pr11911.C | 21 + 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index a7b5a3883373..ecbe58d2b056 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -556,7 +556,7 @@ is_gimple_mem_rhs_or_call (tree t) else return (is_gimple_val (t) || is_gimple_lvalue (t) - || TREE_CLOBBER_P (t) + || (TREE_CODE (t) == CONSTRUCTOR && CONSTRUCTOR_NELTS (t) == 0) || TREE_CODE (t) == CALL_EXPR); } diff --git a/gcc/testsuite/g++.dg/torture/pr11911.C b/gcc/testsuite/g++.dg/torture/pr11911.C new file mode 100644 index ..7dc836ff9b53 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr11911.C @@ -0,0 +1,21 @@ +// { dg-do compile } +// { dg-require-effective-target c++17 } + +struct b { + int a; +}; +struct c { + b d{}; + c() = default; + c(c &) = delete; +}; +struct e { + c a{}; + e() {} +}; +inline e f() { return {}; } +struct g { + e cx; + g() : cx{f()} {} +}; +void h() { g i; }
[gcc r16-1684] diagnostics: Mark path_label::get_effects as final override
https://gcc.gnu.org/g:6af1e7409ce1f763a0fe622e9b08b079d0797989 commit r16-1684-g6af1e7409ce1f763a0fe622e9b08b079d0797989 Author: Martin Jambor Date: Wed Jun 25 17:11:34 2025 +0200 diagnostics: Mark path_label::get_effects as final override When compiling diagnostic-path-output.cc with clang, it warns that path_label::get_effects should be marked as override. That looks like a good idea and from a brief look I also believe it should be marked as final (the other override in the class is marked as both), so this patch does that. Likewise for html_output_format::after_diagnostic in diagnostic-format-html.cc which also already has quite a few member functions marked as final override. gcc/ChangeLog: 2025-06-24 Martin Jambor * diagnostic-path-output.cc (path_label::get_effects): Mark as final override. * diagnostic-format-html.cc (html_output_format::after_diagnostic): Likewise. Diff: --- gcc/diagnostic-format-html.cc | 2 +- gcc/diagnostic-path-output.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/diagnostic-format-html.cc b/gcc/diagnostic-format-html.cc index c397c9f088d0..1f5c138bcd08 100644 --- a/gcc/diagnostic-format-html.cc +++ b/gcc/diagnostic-format-html.cc @@ -1311,7 +1311,7 @@ public: { m_builder.emit_diagram (diagram); } - void after_diagnostic (const diagnostic_info &) + void after_diagnostic (const diagnostic_info &) final override { /* No-op, but perhaps could show paths here. */ } diff --git a/gcc/diagnostic-path-output.cc b/gcc/diagnostic-path-output.cc index bae24bf01a70..4bec3a66267f 100644 --- a/gcc/diagnostic-path-output.cc +++ b/gcc/diagnostic-path-output.cc @@ -135,7 +135,7 @@ class path_label : public range_label return result; } - const label_effects *get_effects (unsigned /*range_idx*/) const + const label_effects *get_effects (unsigned /*range_idx*/) const final override { return &m_effects; }
[gcc r16-1683] ranger-op: Use CFN_ constant instead of plain BUILTIN_ one
https://gcc.gnu.org/g:a73cb702e24b6bc209ca53bcfc9cca5d95dd commit r16-1683-ga73cb702e24b6bc209ca53bcfc9cca5d95dd Author: Martin Jambor Date: Mon Jun 23 18:21:34 2025 +0200 ranger-op: Use CFN_ constant instead of plain BUILTIN_ one when compiling gimple-range-op.cc, clang issues warning: gimple-range-op.cc:1419:18: warning: comparison of different enumeration types in switch statement ('combined_fn' and 'built_in_function') [-Wenum-compare-switch] which I hope is harmless, but all other switch cases use CFN_ prefixed constants, so I guess the ISINF case should too. gcc/ChangeLog: 2025-06-23 Martin Jambor * gimple-range-op.cc (gimple_range_op_handler::maybe_builtin_call): Use CFN_BUILT_IN_ISINF instead of BUILT_IN_ISINF. Diff: --- gcc/gimple-range-op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc index 90a619714898..c9bc5c0c6b90 100644 --- a/gcc/gimple-range-op.cc +++ b/gcc/gimple-range-op.cc @@ -1416,7 +1416,7 @@ gimple_range_op_handler::maybe_builtin_call () m_operator = &op_cfn_signbit; break; -CASE_FLT_FN (BUILT_IN_ISINF): +CASE_FLT_FN (CFN_BUILT_IN_ISINF): m_op1 = gimple_call_arg (call, 0); m_operator = &op_cfn_isinf; break;
[gcc r15-9862] [RISC-V] Fix ICE due to splitter emitting constant loads directly
https://gcc.gnu.org/g:f48873c57f717a844fd009f5f3682e0a0a1b40b7 commit r15-9862-gf48873c57f717a844fd009f5f3682e0a0a1b40b7 Author: Jeff Law Date: Tue Jun 10 06:38:52 2025 -0600 [RISC-V] Fix ICE due to splitter emitting constant loads directly This is a fix for a bug found internally in Ventana using the cf3 testsuite. cf3 looks to be dead as a project and likely subsumed by modern fuzzers. In fact internally we tripped another issue with cf3 that had already been reported by Edwin with the fuzzer he runs. Anyway, the splitter in question blindly emits the 2nd adjusted constant into a register, that's not valid if the constant requires any kind of synthesis -- and it well could since we're mostly focused on the first constant turning into something that can be loaded via LUI without increasing the cost of the second constant. Instead of using the split RTL template, this just emits the code we want directly, using riscv_move_insn to synthesize the constant into the provided temporary register. Tested in my system. Waiting on upstream CI's verdict before moving forward. gcc/ * config/riscv/riscv.md (lui-constraintand_to_or): Do not use the RTL template for split code. Emit it directly taking care to avoid emitting a constant load that needed synthesis. Fix formatting. gcc/testsuite/ * gcc.target/riscv/ventana-16122.c: New test. (cherry picked from commit b93d8873cda88f0892c7782b274904fa8d3751fb) Diff: --- gcc/config/riscv/riscv.md | 18 +- gcc/testsuite/gcc.target/riscv/ventana-16122.c | 19 +++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index c551a1fa4e27..32c8e7147a30 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -875,7 +875,7 @@ ;; Where C1 is not a LUI operand, but ~C1 is a LUI operand (define_insn_and_split "*lui_constraint_and_to_or" - [(set (match_operand:X 0 "register_operand" "=r") + [(set (match_operand:X 0 "register_operand" "=r") (plus:X (and:X (match_operand:X 1 "register_operand" "r") (match_operand 2 "const_int_operand")) (match_operand 3 "const_int_operand"))) @@ -889,13 +889,21 @@ <= riscv_const_insns (operands[3], false)))" "#" "&& reload_completed" - [(set (match_dup 4) (match_dup 5)) - (set (match_dup 0) (ior:X (match_dup 1) (match_dup 4))) - (set (match_dup 4) (match_dup 6)) - (set (match_dup 0) (minus:X (match_dup 0) (match_dup 4)))] + [(const_int 0)] { operands[5] = GEN_INT (~INTVAL (operands[2])); operands[6] = GEN_INT ((~INTVAL (operands[2])) | (-INTVAL (operands[3]))); + +/* This is always a LUI operand, so it's safe to just emit. */ +emit_move_insn (operands[4], operands[5]); + +rtx x = gen_rtx_IOR (word_mode, operands[1], operands[4]); +emit_move_insn (operands[0], x); + +/* This may require multiple steps to synthesize. */ +riscv_emit_move (operands[4], operands[6]); +x = gen_rtx_MINUS (word_mode, operands[0], operands[4]); +emit_move_insn (operands[0], x); } [(set_attr "type" "arith")]) diff --git a/gcc/testsuite/gcc.target/riscv/ventana-16122.c b/gcc/testsuite/gcc.target/riscv/ventana-16122.c new file mode 100644 index ..59e6467b57c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/ventana-16122.c @@ -0,0 +1,19 @@ +/* { dg-do compile { target { rv64 } } } */ + +extern void NG (void); +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +void f74(void) { + int16_t x309 = 0x7fff; + volatile int32_t x310 = 0x7fff; + int8_t x311 = 59; + int16_t x312 = -0x8000; + static volatile int32_t t74 = 614992577; + +t74 = (x309==((x310^x311)%x312)); + +if (t74 != 0) { NG(); } else { ; } + +} +
[gcc r16-1689] arc: Use intrinsics for __builtin_mul_overflow ()
https://gcc.gnu.org/g:5fd8f336f17067fa3ae25eb69f50ca45171ceec8 commit r16-1689-g5fd8f336f17067fa3ae25eb69f50ca45171ceec8 Author: Luis Silva Date: Wed Jun 25 17:58:35 2025 +0300 arc: Use intrinsics for __builtin_mul_overflow () This patch handles both signed and unsigned builtin multiplication overflow. Uses the "mpy.f" instruction to set the condition codes based on the result. In the event of an overflow, the V flag is set, triggering a conditional move depending on the V flag status. For example, set "1" to "r0" in case of overflow: mov_s r0,1 mpy.f r0,r0,r1 j_s.d [blink] mov.nv r0,0 gcc/ChangeLog: * config/arc/arc.md (mulvsi4): New define_expand. (mulsi3_Vcmp): New define_insn. Signed-off-by: Luis Silva Diff: --- gcc/config/arc/arc.md | 33 + 1 file changed, 33 insertions(+) diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 96921207cc41..d119464176b8 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -842,6 +842,9 @@ archs4x, archs4xd" ; Optab prefix for sign/zero-extending operations (define_code_attr su_optab [(sign_extend "") (zero_extend "u")]) +;; Code iterator for sign/zero extension +(define_code_iterator ANY_EXTEND [sign_extend zero_extend]) + (define_insn "*xt_cmp0_noout" [(set (match_operand 0 "cc_set_register" "") (compare:CC_ZN (SEZ:SI (match_operand:SQH 1 "register_operand" "r")) @@ -1068,6 +1071,36 @@ archs4x, archs4xd" (set_attr "cond" "set_zn") (set_attr "length" "*,4,4,4,8")]) +(define_expand "mulvsi4" + [(ANY_EXTEND:DI (match_operand:SI 0 "register_operand")) + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand")) + (ANY_EXTEND:DI (match_operand:SI 2 "register_operand")) + (label_ref (match_operand 3 "" ""))] + "TARGET_MPY" + { +emit_insn (gen_mulsi3_Vcmp (operands[0], operands[1], + operands[2])); +arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); +DONE; + }) + +(define_insn "mulsi3_Vcmp" + [(parallel +[(set + (reg:CC_V CC_REG) + (compare:CC_V + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "%0,r,r,r")) + (ANY_EXTEND:DI (match_operand:SI 2 "nonmemory_operand" "I,L,r,C32"))) + (ANY_EXTEND:DI (mult:SI (match_dup 1) (match_dup 2) + (set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (mult:SI (match_dup 1) (match_dup 2)))])] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "mpy.f\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "multi")]) + (define_insn "*mulsi3_cmp0" [(set (reg:CC_Z CC_REG) (compare:CC_Z
[gcc(refs/users/mikael/heads/unwrap_non_lvalue_v07)] match: Unwrap non-lvalue as unary or binary operand
https://gcc.gnu.org/g:05bb1a71fb052d9145c010e4431d8bc01fb44115 commit 05bb1a71fb052d9145c010e4431d8bc01fb44115 Author: Mikael Morin Date: Thu Jul 4 15:24:36 2024 +0200 match: Unwrap non-lvalue as unary or binary operand Regression tested on x86_64-linux. OK for master? -- 8< -- This change makes the binary and unary folding functions return a tree whose operands have their non-lvalue wrapper stripped (if they had one). It only makes a difference if the function hasn't found any simplification and would return NULL_TREE. It moves all early NULL_TREE return to the end of the function where a last resort common simplification attempt is made. That attempt checks whether the implicit simplifications contained in the stripped operands are worth keeping, and returns a new tree based on the stripped operands in that case. The testcases are best effort; for some operators the fortran frontend generates a temporary variable, so the simplification doesn't happen. Those cases are not tested. gcc/ChangeLog: * fold-const.cc (maybe_lvalue_p): New overload, split from the original function. (stripped_converted_equals_original): New predicate function. (fold_binary_loc): Check whether the initial stripping of operands was a simplification worth keeping before returning NULL_TREE, and rebuild a new tree based on the stripped operands in that case. (fold_unary_loc): Likewise. gcc/testsuite/ChangeLog: * gfortran.dg/non_lvalue_2.f90: New test. * gfortran.dg/non_lvalue_3.f90: New test. Diff: --- gcc/fold-const.cc | 228 ++--- gcc/testsuite/gfortran.dg/non_lvalue_2.f90 | 58 gcc/testsuite/gfortran.dg/non_lvalue_3.f90 | 172 ++ 3 files changed, 375 insertions(+), 83 deletions(-) diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 014f42187932..867ef7017b0d 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -2790,56 +2790,65 @@ fold_convert_loc (location_t loc, tree type, tree arg) return tem; } -/* Return false if expr can be assumed not to be an lvalue, true - otherwise. */ +/* Return false if a tree whose code is CODE can be assumed not to represent an + lvalue, true otherwise. */ static bool -maybe_lvalue_p (const_tree x) +maybe_lvalue_p (enum tree_code code) { /* We only need to wrap lvalue tree codes. */ - switch (TREE_CODE (x)) - { - case VAR_DECL: - case PARM_DECL: - case RESULT_DECL: - case LABEL_DECL: - case FUNCTION_DECL: - case SSA_NAME: - case COMPOUND_LITERAL_EXPR: - - case COMPONENT_REF: - case MEM_REF: - case INDIRECT_REF: - case ARRAY_REF: - case ARRAY_RANGE_REF: - case BIT_FIELD_REF: - case OBJ_TYPE_REF: - - case REALPART_EXPR: - case IMAGPART_EXPR: - case PREINCREMENT_EXPR: - case PREDECREMENT_EXPR: - case SAVE_EXPR: - case TRY_CATCH_EXPR: - case WITH_CLEANUP_EXPR: - case COMPOUND_EXPR: - case MODIFY_EXPR: - case TARGET_EXPR: - case COND_EXPR: - case BIND_EXPR: - case VIEW_CONVERT_EXPR: -break; + switch (code) +{ +case VAR_DECL: +case PARM_DECL: +case RESULT_DECL: +case LABEL_DECL: +case FUNCTION_DECL: +case SSA_NAME: +case COMPOUND_LITERAL_EXPR: + +case COMPONENT_REF: +case MEM_REF: +case INDIRECT_REF: +case ARRAY_REF: +case ARRAY_RANGE_REF: +case BIT_FIELD_REF: +case OBJ_TYPE_REF: - default: -/* Assume the worst for front-end tree codes. */ -if ((int)TREE_CODE (x) >= NUM_TREE_CODES) +case REALPART_EXPR: +case IMAGPART_EXPR: +case PREINCREMENT_EXPR: +case PREDECREMENT_EXPR: +case SAVE_EXPR: +case TRY_CATCH_EXPR: +case WITH_CLEANUP_EXPR: +case COMPOUND_EXPR: +case MODIFY_EXPR: +case TARGET_EXPR: +case COND_EXPR: +case BIND_EXPR: +case VIEW_CONVERT_EXPR: break; -return false; - } + +default: + /* Assume the worst for front-end tree codes. */ + if ((int)code >= NUM_TREE_CODES) + break; + return false; +} return true; } +/* Return false if expr can be assumed not to be an lvalue, true + otherwise. */ + +static bool +maybe_lvalue_p (const_tree x) +{ + return maybe_lvalue_p (TREE_CODE (x)); +} + /* Return an expr equal to X but certainly not valid as an lvalue. */ tree @@ -9181,6 +9190,21 @@ build_fold_addr_expr_loc (location_t loc, tree t) return build_fold_addr_expr_with_type_loc (loc, t, ptrtype); } +/* Tells whether tree ORIGINAL is equal to what would be produced if + converting its nop-stripped subtree STRIPPED to its type. */ + +static inline bool +stripped_converted_equals_original (const_tree stripped, const_tree original) +{ + if (stripped == original) +return true; + + if (TREE_TYPE (stripped) == TREE_TYPE (original)) +return fal
[gcc] Created branch 'mikael/heads/unwrap_non_lvalue_v07' in namespace 'refs/users'
The branch 'mikael/heads/unwrap_non_lvalue_v07' was created in namespace 'refs/users' pointing to: 05bb1a71fb05... match: Unwrap non-lvalue as unary or binary operand
[gcc r16-1686] ARC: Use intrinsics for __builtin_sub_overflow*()
https://gcc.gnu.org/g:b9361e5bcbdf08ad6279d58ca63c4a260d34fcaf commit r16-1686-gb9361e5bcbdf08ad6279d58ca63c4a260d34fcaf Author: Shahab Vahedi Date: Wed Jun 25 17:37:02 2025 +0300 ARC: Use intrinsics for __builtin_sub_overflow*() This patch covers signed and unsigned subtractions. The generated code would be something along these lines: signed: sub.f r0, r1, r2 b.v @label unsigned: sub.f r0, r1, r2 b.c @label gcc/ * config/arc/arc.md (subsi3_v, subvsi4, subsi3_c): New patterns. gcc/testsuite/ * gcc.target/arc/overflow-2.c: New file. Diff: --- gcc/config/arc/arc.md | 48 +++ gcc/testsuite/gcc.target/arc/overflow-2.c | 97 +++ 2 files changed, 145 insertions(+) diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index c81a7f19d228..2394eb8c001e 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -2964,6 +2964,54 @@ archs4x, archs4xd" (set_attr "cpu_facility" "*,cd,*,*,*,*,*,*,*,*") ]) +(define_insn "subsi3_v" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r") + (minus:SI (match_operand:SI 1 "register_operand" "r,r,0, r") + (match_operand:SI 2 "nonmemory_operand" "r,L,I,C32"))) + (set (reg:CC_V CC_REG) + (compare:CC_V (sign_extend:DI (minus:SI (match_dup 1) + (match_dup 2))) + (minus:DI (sign_extend:DI (match_dup 1)) + (sign_extend:DI (match_dup 2)] + "" + "sub.f\\t%0,%1,%2" + [(set_attr "cond" "set") +(set_attr "type" "compare") +(set_attr "length" "4,4,4,8")]) + +(define_expand "subvsi4" + [(match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand") + (label_ref (match_operand 3 "" ""))] + "" + "emit_insn (gen_subsi3_v (operands[0], operands[1], operands[2])); + arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE;") + +(define_insn "subsi3_c" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r") + (minus:SI (match_operand:SI 1 "register_operand" "r,r,0, r") + (match_operand:SI 2 "nonmemory_operand""r,L,I,C32"))) + (set (reg:CC_C CC_REG) + (compare:CC_C (match_dup 1) + (match_dup 2)))] + "" + "sub.f\\t%0,%1,%2" + [(set_attr "cond" "set") +(set_attr "type" "compare") +(set_attr "length" "4,4,4,8")]) + +(define_expand "usubvsi4" + [(match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand") + (label_ref (match_operand 3 "" ""))] + "" + "emit_insn (gen_subsi3_c (operands[0], operands[1], operands[2])); +arc_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]); +DONE;") + (define_expand "subdi3" [(set (match_operand:DI 0 "register_operand" "") (minus:DI (match_operand:DI 1 "register_operand" "") diff --git a/gcc/testsuite/gcc.target/arc/overflow-2.c b/gcc/testsuite/gcc.target/arc/overflow-2.c new file mode 100644 index ..b4de8c03b228 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/overflow-2.c @@ -0,0 +1,97 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include +#include + +/* + * sub.f r0,r0,r1 + * st_s r0,[r2] + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool sub_overflow (int32_t a, int32_t b, int32_t *res) +{ + return __builtin_sub_overflow (a, b, res); +} + +/* + * sub.f r0,r0,-1234 + * st_s r0,[r1] + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool subi_overflow (int32_t a, int32_t *res) +{ + return __builtin_sub_overflow (a, -1234, res); +} + +/* + * sub.f r3,r0,r1 + * st_s r3,[r2] + * j_s.d [blink] + * setlo r0,r0,r1 + */ +bool usub_overflow (uint32_t a, uint32_t b, uint32_t *res) +{ + return __builtin_sub_overflow (a, b, res); +} + +/* + * sub.f r2,r0,4321 + * seths r0,4320,r0 + * j_s.d [blink] + * st_s r2,[r1] + */ +bool usubi_overflow (uint32_t a, uint32_t *res) +{ + return __builtin_sub_overflow (a, 4321, res); +} + +/* + * sub.f r0,r0,r1 + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool sub_overflow_p (int32_t a, int32_t b, int32_t res) +{ + return __builtin_sub_overflow_p (a, b, res); +} + +/* + * sub.f r0,r0,-1000 + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool subi_overflow_p (int32_t a, int32_t res) +{ + return __builtin_sub_overflow_p (a, -1000, res); +} + +/* + * j_s.d [blink] + * setlo r0,r0,r1 + */ +bool usub_overflow_p (uint32_t a, uint32_t b, uint32_t res) +{ + return __builtin_sub_overflow_p (a, b, res); +} + +/* + * seths r0,1999,r0 + * j_s.d [blink] + */ +bool usubi_overflow_p (uint32_t a, uint32_t res) +{ + return __builtin_sub_overflow_p (a, 2000, res); +} + +/* { dg-final { scan-assembler-times "sub.f\\s\+" 6 } } */ +/*
[gcc r16-1687] arc: testsuite: Scan rlc instead of mov.hs
https://gcc.gnu.org/g:913d8cbcd568bb685be23e5ead157fe9c4e48f0c commit r16-1687-g913d8cbcd568bb685be23e5ead157fe9c4e48f0c Author: Luis Silva Date: Wed Jun 25 17:45:37 2025 +0300 arc: testsuite: Scan rlc instead of mov.hs Due to the patch by Roger Sayle, 09881218137f4af9b7c894c2d350cf2ff8e0ee23, which introduces the use of the `rlc rX,0` instruction in place of the `mov.hs`, the add overflow test case needs to be updated. The previous test case was validating the `mov.hs` instruction, but now it must validate the `rlc` instruction as the new behavior. gcc/testsuite/ChangeLog: * gcc.target/arc/overflow-1.c: Replace mov.hs with rlc. Signed-off-by: Luis Silva Diff: --- gcc/testsuite/gcc.target/arc/overflow-1.c | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.target/arc/overflow-1.c b/gcc/testsuite/gcc.target/arc/overflow-1.c index 01b3e8ad0fab..cf1d0d0902c6 100644 --- a/gcc/testsuite/gcc.target/arc/overflow-1.c +++ b/gcc/testsuite/gcc.target/arc/overflow-1.c @@ -31,9 +31,8 @@ bool addi_overflow (int32_t a, int32_t *res) /* * add.f r0,r0,r1 * st_s r0,[r2] - * mov_s r0,1 * j_s.d [blink] - * mov.hs r0,0 + * rlcr0,0 */ bool uadd_overflow (uint32_t a, uint32_t b, uint32_t *res) { @@ -75,9 +74,8 @@ bool addi_overflow_p (int32_t a, int32_t res) /* * add.f 0,r0,r1 - * mov_s r0,1 * j_s.d [blink] - * mov.hs r0,0 + * rlc r0,0 */ bool uadd_overflow_p (uint32_t a, uint32_t b, uint32_t res) { @@ -95,6 +93,6 @@ bool uaddi_overflow_p (uint32_t a, uint32_t res) /* { dg-final { scan-assembler-times "add.f\\s\+" 7 } } */ /* { dg-final { scan-assembler-times "mov\.nv\\s\+" 4 } } */ -/* { dg-final { scan-assembler-times "mov\.hs\\s\+" 2 } } */ +/* { dg-final { scan-assembler-times "rlc\\s\+" 2 } } */ /* { dg-final { scan-assembler-times "seths\\s\+" 2 } } */ /* { dg-final { scan-assembler-not "cmp" } } */