https://gcc.gnu.org/g:51761b3b8b98e1b9ca02ae293de00644da83b85d
commit r14-11236-g51761b3b8b98e1b9ca02ae293de00644da83b85d Author: Richard Sandiford <richard.sandif...@arm.com> Date: Wed Jan 22 15:23:54 2025 +0000 aarch64: Detect word-level modification in early-ra [PR118184] REGMODE_NATURAL_SIZE is set to 64 bits for everything except VLA SVE modes. This means that it's possible to modify (say) the highpart of a TI pseudo or a V2DI pseudo independently of the lowpart. Modifying such highparts requires a reload if the highpart ends up in the upper 64 bits of an FPR, since RTL semantics do not allow the highpart of a single hard register to be modified independently of the lowpart. early-ra missed a check for this case, which meant that it effectively treated an assignment to (subreg:DI (reg:TI R) 0) as an assignment to the whole of R. gcc/ PR target/118184 * config/aarch64/aarch64-early-ra.cc (allocno_assignment_is_rmw): New function. (early_ra::record_insn_defs): Mark the live range information as untrustworthy if an assignment would change part of an allocno but preserve the rest. gcc/testsuite/ * gcc.dg/torture/pr118184.c: New test. Diff: --- gcc/config/aarch64/aarch64-early-ra.cc | 51 ++++++++++++++++++++++++++++++++- gcc/testsuite/gcc.dg/torture/pr118184.c | 36 +++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc index 1e2c823cb2eb..dd2ed762f8ac 100644 --- a/gcc/config/aarch64/aarch64-early-ra.cc +++ b/gcc/config/aarch64/aarch64-early-ra.cc @@ -1933,6 +1933,43 @@ early_ra::record_artificial_refs (unsigned int flags) m_current_point += 1; } +// Return true if: +// +// - X is a SUBREG, in which case it is a SUBREG of some REG Y +// +// - one 64-bit word of Y can be modified while preserving all other words +// +// - X refers to no more than one 64-bit word of Y +// +// - assigning FPRs to Y would put more than one 64-bit word in each FPR +// +// For example, this is true of: +// +// - (subreg:DI (reg:TI R) 0) and +// - (subreg:DI (reg:TI R) 8) +// +// but is not true of: +// +// - (subreg:V2SI (reg:V2x2SI R) 0) or +// - (subreg:V2SI (reg:V2x2SI R) 8). +static bool +allocno_assignment_is_rmw (rtx x) +{ + if (partial_subreg_p (x)) + { + auto outer_mode = GET_MODE (x); + auto inner_mode = GET_MODE (SUBREG_REG (x)); + if (known_eq (REGMODE_NATURAL_SIZE (inner_mode), 0U + UNITS_PER_WORD) + && known_lt (GET_MODE_SIZE (outer_mode), UNITS_PER_VREG)) + { + auto nregs = targetm.hard_regno_nregs (V0_REGNUM, inner_mode); + if (maybe_ne (nregs * UNITS_PER_WORD, GET_MODE_SIZE (inner_mode))) + return true; + } + } + return false; +} + // Model the register references in INSN as part of a backwards walk. void early_ra::record_insn_refs (rtx_insn *insn) @@ -1945,9 +1982,21 @@ early_ra::record_insn_refs (rtx_insn *insn) record_fpr_def (DF_REF_REGNO (ref)); else { - auto range = get_allocno_subgroup (DF_REF_REG (ref)); + rtx reg = DF_REF_REG (ref); + auto range = get_allocno_subgroup (reg); for (auto &allocno : range.allocnos ()) { + // Make sure that assigning to the DF_REF_REG clobbers the + // whole of this allocno, not just some of it. + if (allocno_assignment_is_rmw (reg)) + { + m_allocation_successful = false; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "read-modify-write of allocno %d", + allocno.id); + break; + } + // If the destination is unused, record a momentary blip // in its live range. if (!bitmap_bit_p (m_live_allocnos, allocno.id)) diff --git a/gcc/testsuite/gcc.dg/torture/pr118184.c b/gcc/testsuite/gcc.dg/torture/pr118184.c new file mode 100644 index 000000000000..5933e2a12221 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr118184.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { float128 && lp64 } } } */ + +union u1 +{ + _Float128 ld; + unsigned long l[2]; +}; + +[[gnu::noipa]] +unsigned long m() +{ + return 1000; +} + +[[gnu::noinline]] +_Float128 f(void) +{ + union u1 u; + u.ld = __builtin_nanf128(""); + u.l[0] = m(); + return u.ld; +} + +int main() +{ + union u1 u; + u.ld = f(); + union u1 u2; + u2.ld = __builtin_nanf128(""); + u2.l[0] = m(); + if (u.l[0] != u2.l[0]) + __builtin_abort(); + if (u.l[1] != u2.l[1]) + __builtin_abort(); + return 0; +}