[gcc r15-330] [PR114810][LRA]: Recognize alternatives with lack of available registers for insn and demote them.

2024-05-08 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:2f00e6caca1a14dfe26e94f608e9d79a787ebe08

commit r15-330-g2f00e6caca1a14dfe26e94f608e9d79a787ebe08
Author: Vladimir N. Makarov 
Date:   Wed May 8 10:39:04 2024 -0400

[PR114810][LRA]: Recognize alternatives with lack of available registers 
for insn and demote them.

  PR114810 was fixed in machine-dependent way.  This patch is a fix of
the PR on LRA side.  LRA chose alternative with constraints `&r,r,ro`
on i686 when all operands of DImode and there are only 6 available
general regs.  The patch recognizes such case and significantly
increase the alternative cost.  It does not reject alternative
completely.  So the fix is safe but it might not work for all
potentially possible cases of registers lack as register classes can
have any relations including subsets and intersections.

gcc/ChangeLog:

PR target/114810
* lra-constraints.cc (process_alt_operands): Calculate union reg
class for the alternative, peak matched regs and required reload
regs.  Recognize alternatives with lack of available registers and
make them costly.  Add debug print about this case.

Diff:
---
 gcc/lra-constraints.cc | 43 +--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 10e3d4e40977..5b78fd0b7e5c 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -2127,6 +2127,8 @@ process_alt_operands (int only_alternative)
   /* Numbers of operands which are early clobber registers.  */
   int early_clobbered_nops[MAX_RECOG_OPERANDS];
   enum reg_class curr_alt[MAX_RECOG_OPERANDS];
+  enum reg_class all_this_alternative;
+  int all_used_nregs, all_reload_nregs;
   HARD_REG_SET curr_alt_set[MAX_RECOG_OPERANDS];
   HARD_REG_SET curr_alt_exclude_start_hard_regs[MAX_RECOG_OPERANDS];
   bool curr_alt_match_win[MAX_RECOG_OPERANDS];
@@ -2229,7 +2231,8 @@ process_alt_operands (int only_alternative)
   curr_alt_out_sp_reload_p = false;
   curr_reuse_alt_p = true;
   curr_alt_class_change_p = false;
-  
+  all_this_alternative = NO_REGS;
+  all_used_nregs = all_reload_nregs = 0;
   for (nop = 0; nop < n_operands; nop++)
{
  const char *p;
@@ -2660,6 +2663,15 @@ process_alt_operands (int only_alternative)
  /* Record which operands fit this alternative.  */
  if (win)
{
+ if (early_clobber_p
+ || curr_static_id->operand[nop].type != OP_OUT)
+   {
+ all_used_nregs
+   += ira_reg_class_min_nregs[this_alternative][mode];
+ all_this_alternative
+   = (reg_class_subunion
+  [all_this_alternative][this_alternative]);
+   }
  this_alternative_win = true;
  if (class_change_p)
{
@@ -2781,7 +2793,19 @@ process_alt_operands (int only_alternative)
   & ~((ira_prohibited_class_mode_regs
[this_alternative][mode])
   | lra_no_alloc_regs));
- if (hard_reg_set_empty_p (available_regs))
+ if (!hard_reg_set_empty_p (available_regs))
+   {
+ if (early_clobber_p
+ || curr_static_id->operand[nop].type != OP_OUT)
+   {
+ all_reload_nregs
+   += ira_reg_class_min_nregs[this_alternative][mode];
+ all_this_alternative
+   = (reg_class_subunion
+  [all_this_alternative][this_alternative]);
+   }
+   }
+ else
{
  /* There are no hard regs holding a value of given
 mode.  */
@@ -3217,6 +3241,21 @@ process_alt_operands (int only_alternative)
 "Cycle danger: overall += LRA_MAX_REJECT\n");
  overall += LRA_MAX_REJECT;
}
+  if (all_this_alternative != NO_REGS
+ && all_used_nregs != 0 && all_reload_nregs != 0
+ && (all_used_nregs + all_reload_nregs + 1
+ >= ira_class_hard_regs_num[all_this_alternative]))
+   {
+ if (lra_dump_file != NULL)
+   fprintf
+ (lra_dump_file,
+  "Register starvation: overall += LRA_MAX_REJECT"
+  "(class=%s,avail=%d,used=%d,reload=%d)\n",
+  reg_class_names[all_this_alternative],
+  ira_class_hard_regs_num[all_this_alternative],
+  all_used_nregs, all_reload_nregs);
+ overall += LRA_MAX_REJECT;
+   }
   ok_p = true;
   curr_alt_dont_inherit_ops_num = 0;
   for (nop = 0; nop < early_clobbered_regs_num; nop++)


[gcc r13-8740] [PR114415][scheduler]: Fixing wrong code generation

2024-05-09 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:e30211cb0b3a2b88959e9bc40626a17461de52de

commit r13-8740-ge30211cb0b3a2b88959e9bc40626a17461de52de
Author: Vladimir N. Makarov 
Date:   Thu Apr 4 16:04:04 2024 -0400

[PR114415][scheduler]: Fixing wrong code generation

  For the test case, the insn scheduler (working for live range
shrinkage) moves insns modifying stack memory before an insn reserving
the stack memory. Comments in the patch contains more details about
the problem and its solution.

gcc/ChangeLog:

PR rtl-optimization/114415
* sched-deps.cc (add_insn_mem_dependence): Add memory check for mem 
argument.
(sched_analyze_1): Treat stack pointer modification as memory read.
(sched_analyze_2, sched_analyze_insn): Add memory guard for 
processing pending_read_mems.
* sched-int.h (deps_desc): Add comment to pending_read_mems.

gcc/testsuite/ChangeLog:

PR rtl-optimization/114415
* gcc.target/i386/pr114415.c: New test.

Diff:
---
 gcc/sched-deps.cc| 49 +---
 gcc/sched-int.h  |  4 ++-
 gcc/testsuite/gcc.target/i386/pr114415.c | 47 ++
 3 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/gcc/sched-deps.cc b/gcc/sched-deps.cc
index 2aa6623ad2ea..2104895f3009 100644
--- a/gcc/sched-deps.cc
+++ b/gcc/sched-deps.cc
@@ -1735,7 +1735,7 @@ add_insn_mem_dependence (class deps_desc *deps, bool 
read_p,
   insn_node = alloc_INSN_LIST (insn, *insn_list);
   *insn_list = insn_node;
 
-  if (sched_deps_info->use_cselib)
+  if (sched_deps_info->use_cselib && MEM_P (mem))
 {
   mem = shallow_copy_rtx (mem);
   XEXP (mem, 0) = cselib_subst_to_values_from_insn (XEXP (mem, 0),
@@ -2458,6 +2458,25 @@ sched_analyze_1 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
   FIRST_STACK_REG);
}
 #endif
+  if (!deps->readonly && regno == STACK_POINTER_REGNUM)
+   {
+ /* Please see PR114115.  We have insn modifying memory on the stack
+and not addressed by stack pointer and we have insn reserving the
+stack space.  If we move the insn modifying memory before insn
+reserving the stack space, we can change memory out of the red
+zone.  Even worse, some optimizations (e.g. peephole) can add
+insns using temporary stack slots before insn reserving the stack
+space but after the insn modifying memory.  This will corrupt the
+modified memory.  Therefore we treat insn changing the stack as
+reading unknown memory.  This will create anti-dependence.  We
+don't need to treat the insn as writing memory because GCC by
+itself does not generate code reading undefined stack memory.  */
+ if ((deps->pending_read_list_length + deps->pending_write_list_length)
+ >= param_max_pending_list_length
+ && !DEBUG_INSN_P (insn))
+   flush_pending_lists (deps, insn, true, true);
+ add_insn_mem_dependence (deps, true, insn, dest);
+   }
 }
   else if (MEM_P (dest))
 {
@@ -2498,10 +2517,11 @@ sched_analyze_1 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
  pending_mem = deps->pending_read_mems;
  while (pending)
{
- if (anti_dependence (pending_mem->element (), t)
- && ! sched_insns_conditions_mutex_p (insn, pending->insn ()))
-   note_mem_dep (t, pending_mem->element (), pending->insn (),
- DEP_ANTI);
+ rtx mem = pending_mem->element ();
+ if (REG_P (mem)
+ || (anti_dependence (mem, t)
+ && ! sched_insns_conditions_mutex_p (insn, pending->insn 
(
+   note_mem_dep (t, mem, pending->insn (), DEP_ANTI);
 
  pending = pending->next ();
  pending_mem = pending_mem->next ();
@@ -2637,12 +2657,10 @@ sched_analyze_2 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
pending_mem = deps->pending_read_mems;
while (pending)
  {
-   if (read_dependence (pending_mem->element (), t)
-   && ! sched_insns_conditions_mutex_p (insn,
-pending->insn ()))
- note_mem_dep (t, pending_mem->element (),
-   pending->insn (),
-   DEP_ANTI);
+   rtx mem = pending_mem->element ();
+   if (MEM_P (mem) && read_dependence (mem, t)
+   && ! sched_insns_conditions_mutex_p (insn, pending->insn 
()))
+ note_mem_dep (t, mem, pending->insn (), DEP_ANTI);
 
pending = pending->next ();
pending_mem = pending_mem->next ();
@@ -3026,8 +3044,7 @@ sched_analyze_insn (class deps_de

[gcc r15-364] [PR114942][LRA]: Don't reuse input reload reg of inout early clobber operand

2024-05-10 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:9585317f0715699197b1313bbf939c6ea3c1ace6

commit r15-364-g9585317f0715699197b1313bbf939c6ea3c1ace6
Author: Vladimir N. Makarov 
Date:   Fri May 10 09:15:50 2024 -0400

[PR114942][LRA]: Don't reuse input reload reg of inout early clobber operand

  The insn in question has the same reg in inout operand and input
operand.  The inout operand is early clobber.  LRA reused input reload
reg of the inout operand for the input operand which is wrong.  It
were a good decision if the inout operand was not early clobber one.
The patch rejects the reuse for the PR test case.

gcc/ChangeLog:

PR target/114942
* lra-constraints.cc (struct input_reload): Add new member 
early_clobber_p.
(get_reload_reg): Add new arg early_clobber_p, don't reuse input
reload with true early_clobber_p member value, use the arg for new
element of curr_insn_input_reloads.
(match_reload): Assign false to early_clobber_p member.
(process_addr_reg, simplify_operand_subreg, curr_insn_transform):
Adjust get_reload_reg calls.

gcc/testsuite/ChangeLog:

PR target/114942
* gcc.target/i386/pr114942.c: New.

Diff:
---
 gcc/lra-constraints.cc   | 27 +++
 gcc/testsuite/gcc.target/i386/pr114942.c | 24 
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 5b78fd0b7e5c..e945a4da4519 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -599,6 +599,8 @@ struct input_reload
 {
   /* True for input reload of matched operands.  */
   bool match_p;
+  /* True for input reload of inout earlyclobber operand.  */
+  bool early_clobber_p;
   /* Reloaded value.  */
   rtx input;
   /* Reload pseudo used.  */
@@ -649,13 +651,15 @@ canonicalize_reload_addr (rtx addr)
 /* Create a new pseudo using MODE, RCLASS, EXCLUDE_START_HARD_REGS, ORIGINAL or
reuse an existing reload pseudo.  Don't reuse an existing reload pseudo if
IN_SUBREG_P is true and the reused pseudo should be wrapped up in a SUBREG.
+   EARLY_CLOBBER_P is true for input reload of inout early clobber operand.
The result pseudo is returned through RESULT_REG.  Return TRUE if we created
a new pseudo, FALSE if we reused an existing reload pseudo.  Use TITLE to
describe new registers for debug purposes.  */
 static bool
 get_reload_reg (enum op_type type, machine_mode mode, rtx original,
enum reg_class rclass, HARD_REG_SET *exclude_start_hard_regs,
-   bool in_subreg_p, const char *title, rtx *result_reg)
+   bool in_subreg_p, bool early_clobber_p,
+   const char *title, rtx *result_reg)
 {
   int i, regno;
   enum reg_class new_class;
@@ -703,6 +707,7 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx 
original,
 for (i = 0; i < curr_insn_input_reloads_num; i++)
   {
if (! curr_insn_input_reloads[i].match_p
+   && ! curr_insn_input_reloads[i].early_clobber_p
&& rtx_equal_p (curr_insn_input_reloads[i].input, original)
&& in_class_p (curr_insn_input_reloads[i].reg, rclass, &new_class))
  {
@@ -750,6 +755,8 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx 
original,
   lra_assert (curr_insn_input_reloads_num < LRA_MAX_INSN_RELOADS);
   curr_insn_input_reloads[curr_insn_input_reloads_num].input = original;
   curr_insn_input_reloads[curr_insn_input_reloads_num].match_p = false;
+  curr_insn_input_reloads[curr_insn_input_reloads_num].early_clobber_p
+= early_clobber_p;
   curr_insn_input_reloads[curr_insn_input_reloads_num++].reg = *result_reg;
   return true;
 }
@@ -1189,6 +1196,7 @@ match_reload (signed char out, signed char *ins, signed 
char *outs,
   lra_assert (curr_insn_input_reloads_num < LRA_MAX_INSN_RELOADS);
   curr_insn_input_reloads[curr_insn_input_reloads_num].input = in_rtx;
   curr_insn_input_reloads[curr_insn_input_reloads_num].match_p = true;
+  curr_insn_input_reloads[curr_insn_input_reloads_num].early_clobber_p = false;
   curr_insn_input_reloads[curr_insn_input_reloads_num++].reg = new_in_reg;
   for (i = 0; (in = ins[i]) >= 0; i++)
 if (GET_MODE (*curr_id->operand_loc[in]) == VOIDmode
@@ -1577,7 +1585,7 @@ process_addr_reg (rtx *loc, bool check_only_p, rtx_insn 
**before, rtx_insn **aft
  reg = *loc;
  if (get_reload_reg (after == NULL ? OP_IN : OP_INOUT,
  mode, reg, cl, NULL,
- subreg_p, "address", &new_reg))
+ subreg_p, false, "address", &new_reg))
before_p = true;
}
   else if (new_class != NO_REGS && rclass != new_class)
@@ -1733,7 +1741,7 @@ simplify_operand_subreg (int nop, machine_mode reg_mode)
= (enum reg_class) targetm.preferred_reload_class (reg, ALL_REGS);

[gcc r15-436] [PR115013][LRA]: Modify register starvation recognition

2024-05-13 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:44e7855e4e817a7f5a1e332cd95e780e57052dba

commit r15-436-g44e7855e4e817a7f5a1e332cd95e780e57052dba
Author: Vladimir N. Makarov 
Date:   Mon May 13 10:12:11 2024 -0400

[PR115013][LRA]: Modify register starvation recognition

  My recent patch to recognize reg starvation resulted in few GCC test
failures.  The following patch fixes this by using more accurate
starvation calculation and ignoring small reg classes.

gcc/ChangeLog:

PR rtl-optimization/115013
* lra-constraints.cc (process_alt_operands): Update all_used_nregs
only for winreg.  Ignore reg starvation for small reg classes.

Diff:
---
 gcc/lra-constraints.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index e945a4da4519..92b343fa99a0 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -2674,8 +2674,9 @@ process_alt_operands (int only_alternative)
  if (early_clobber_p
  || curr_static_id->operand[nop].type != OP_OUT)
{
- all_used_nregs
-   += ira_reg_class_min_nregs[this_alternative][mode];
+ if (winreg)
+   all_used_nregs
+ += ira_reg_class_min_nregs[this_alternative][mode];
  all_this_alternative
= (reg_class_subunion
   [all_this_alternative][this_alternative]);
@@ -3250,6 +3251,7 @@ process_alt_operands (int only_alternative)
  overall += LRA_MAX_REJECT;
}
   if (all_this_alternative != NO_REGS
+ && !SMALL_REGISTER_CLASS_P (all_this_alternative)
  && all_used_nregs != 0 && all_reload_nregs != 0
  && (all_used_nregs + all_reload_nregs + 1
  >= ira_class_hard_regs_num[all_this_alternative]))


[gcc r14-9401] [PR113790][LRA]: Fixing LRA ICE on riscv64

2024-03-08 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:cebbaa2a84586a7345837f74a53b7a0263bf29ee

commit r14-9401-gcebbaa2a84586a7345837f74a53b7a0263bf29ee
Author: Vladimir N. Makarov 
Date:   Fri Mar 8 14:48:33 2024 -0500

[PR113790][LRA]: Fixing LRA ICE on riscv64

  LRA failed to consider all insn alternatives when non-reload pseudo
did not get a hard register.  This resulted in failure to generate
code by LRA.  The patch fixes this problem.

gcc/ChangeLog:

PR target/113790
* lra-assigns.cc (assign_by_spills): Set up all_spilled_pseudos
for non-reload pseudo too.

Diff:
---
 gcc/lra-assigns.cc | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/lra-assigns.cc b/gcc/lra-assigns.cc
index d1b2b35ffc9..7dfa6f70941 100644
--- a/gcc/lra-assigns.cc
+++ b/gcc/lra-assigns.cc
@@ -1430,13 +1430,19 @@ assign_by_spills (void)
hard_regno = spill_for (regno, &all_spilled_pseudos, iter == 1);
  if (hard_regno < 0)
{
- if (reload_p) {
-   /* Put unassigned reload pseudo first in the
-  array.  */
-   regno2 = sorted_pseudos[nfails];
-   sorted_pseudos[nfails++] = regno;
-   sorted_pseudos[i] = regno2;
- }
+ if (reload_p)
+   {
+ /* Put unassigned reload pseudo first in the array.  */
+ regno2 = sorted_pseudos[nfails];
+ sorted_pseudos[nfails++] = regno;
+ sorted_pseudos[i] = regno2;
+   }
+ else
+   {
+ /* Consider all alternatives on the next constraint
+subpass.  */
+ bitmap_set_bit (&all_spilled_pseudos, regno);
+   }
}
  else
{


[gcc r14-9557] [PR99829][LRA]: Fixing LRA ICE on arm

2024-03-19 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:9c91f8a88b2db50c8faf70786d3cef27b39ac9fc

commit r14-9557-g9c91f8a88b2db50c8faf70786d3cef27b39ac9fc
Author: Vladimir N. Makarov 
Date:   Tue Mar 19 16:57:11 2024 -0400

[PR99829][LRA]: Fixing LRA ICE on arm

  LRA removed insn setting equivalence to memory whose output was
reloaded. This resulted in writing an uninitiated value to the memory
which triggered assert in LRA code checking the final generated code.
This patch fixes the problem.  Comment in the patch contains more
details about the problem and its solution.

gcc/ChangeLog:

PR target/99829
* lra-constraints.cc (lra_constraints): Prevent removing insn
with reverse equivalence to memory if the memory was reloaded.

Diff:
---
 gcc/lra-constraints.cc | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 0ae81c1ff9c..10e3d4e4097 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -5213,7 +5213,7 @@ lra_constraints (bool first_p)
   bool changed_p;
   int i, hard_regno, new_insns_num;
   unsigned int min_len, new_min_len, uid;
-  rtx set, x, reg, dest_reg;
+  rtx set, x, reg, nosubreg_dest;
   rtx_insn *original_insn;
   basic_block last_bb;
   bitmap_iterator bi;
@@ -5377,14 +5377,14 @@ lra_constraints (bool first_p)
{
  if ((set = single_set (curr_insn)) != NULL_RTX)
{
- dest_reg = SET_DEST (set);
+ nosubreg_dest = SET_DEST (set);
  /* The equivalence pseudo could be set up as SUBREG in a
 case when it is a call restore insn in a mode
 different from the pseudo mode.  */
- if (GET_CODE (dest_reg) == SUBREG)
-   dest_reg = SUBREG_REG (dest_reg);
- if ((REG_P (dest_reg)
-  && (x = get_equiv (dest_reg)) != dest_reg
+ if (GET_CODE (nosubreg_dest) == SUBREG)
+   nosubreg_dest = SUBREG_REG (nosubreg_dest);
+ if ((REG_P (nosubreg_dest)
+  && (x = get_equiv (nosubreg_dest)) != nosubreg_dest
   /* Remove insns which set up a pseudo whose value
  cannot be changed.  Such insns might be not in
  init_insns because we don't update equiv data
@@ -5403,11 +5403,21 @@ lra_constraints (bool first_p)
  up the equivalence.  */
   || in_list_p (curr_insn,
 ira_reg_equiv
-[REGNO (dest_reg)].init_insns)))
+[REGNO (nosubreg_dest)].init_insns)))
  || (((x = get_equiv (SET_SRC (set))) != SET_SRC (set))
  && in_list_p (curr_insn,
ira_reg_equiv
-   [REGNO (SET_SRC (set))].init_insns)))
+   [REGNO (SET_SRC (set))].init_insns)
+ /* This is a reverse equivalence to memory (see ira.cc)
+in store insn.  We can reload all the destination and
+have an output reload which is a store to memory.  If
+we just remove the insn, we will have the output
+reload storing an undefined value to the memory.
+Check that we did not reload the memory to prevent a
+wrong code generation.  We could implement using the
+equivalence still in such case but doing this is not
+worth the efforts as such case is very rare.  */
+ && MEM_P (nosubreg_dest)))
{
  /* This is equiv init insn of pseudo which did not get a
 hard register -- remove the insn.  */


[gcc r14-9793] [PR114415][scheduler]: Fixing wrong code generation

2024-04-04 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:a24476422ba311b83737cf8bdc5892a7fc7514eb

commit r14-9793-ga24476422ba311b83737cf8bdc5892a7fc7514eb
Author: Vladimir N. Makarov 
Date:   Thu Apr 4 16:04:04 2024 -0400

[PR114415][scheduler]: Fixing wrong code generation

  For the test case, the insn scheduler (working for live range
shrinkage) moves insns modifying stack memory before an insn reserving
the stack memory. Comments in the patch contains more details about
the problem and its solution.

gcc/ChangeLog:

PR rtl-optimization/114415
* sched-deps.cc (add_insn_mem_dependence): Add memory check for mem 
argument.
(sched_analyze_1): Treat stack pointer modification as memory read.
(sched_analyze_2, sched_analyze_insn): Add memory guard for 
processing pending_read_mems.
* sched-int.h (deps_desc): Add comment to pending_read_mems.

gcc/testsuite/ChangeLog:

PR rtl-optimization/114415
* gcc.target/i386/pr114415.c: New test.

Diff:
---
 gcc/sched-deps.cc| 49 +---
 gcc/sched-int.h  |  4 ++-
 gcc/testsuite/gcc.target/i386/pr114415.c | 47 ++
 3 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/gcc/sched-deps.cc b/gcc/sched-deps.cc
index 5034e664e5e..4c668245049 100644
--- a/gcc/sched-deps.cc
+++ b/gcc/sched-deps.cc
@@ -1735,7 +1735,7 @@ add_insn_mem_dependence (class deps_desc *deps, bool 
read_p,
   insn_node = alloc_INSN_LIST (insn, *insn_list);
   *insn_list = insn_node;
 
-  if (sched_deps_info->use_cselib)
+  if (sched_deps_info->use_cselib && MEM_P (mem))
 {
   mem = shallow_copy_rtx (mem);
   XEXP (mem, 0) = cselib_subst_to_values_from_insn (XEXP (mem, 0),
@@ -2458,6 +2458,25 @@ sched_analyze_1 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
   FIRST_STACK_REG);
}
 #endif
+  if (!deps->readonly && regno == STACK_POINTER_REGNUM)
+   {
+ /* Please see PR114115.  We have insn modifying memory on the stack
+and not addressed by stack pointer and we have insn reserving the
+stack space.  If we move the insn modifying memory before insn
+reserving the stack space, we can change memory out of the red
+zone.  Even worse, some optimizations (e.g. peephole) can add
+insns using temporary stack slots before insn reserving the stack
+space but after the insn modifying memory.  This will corrupt the
+modified memory.  Therefore we treat insn changing the stack as
+reading unknown memory.  This will create anti-dependence.  We
+don't need to treat the insn as writing memory because GCC by
+itself does not generate code reading undefined stack memory.  */
+ if ((deps->pending_read_list_length + deps->pending_write_list_length)
+ >= param_max_pending_list_length
+ && !DEBUG_INSN_P (insn))
+   flush_pending_lists (deps, insn, true, true);
+ add_insn_mem_dependence (deps, true, insn, dest);
+   }
 }
   else if (MEM_P (dest))
 {
@@ -2498,10 +2517,11 @@ sched_analyze_1 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
  pending_mem = deps->pending_read_mems;
  while (pending)
{
- if (anti_dependence (pending_mem->element (), t)
- && ! sched_insns_conditions_mutex_p (insn, pending->insn ()))
-   note_mem_dep (t, pending_mem->element (), pending->insn (),
- DEP_ANTI);
+ rtx mem = pending_mem->element ();
+ if (REG_P (mem)
+ || (anti_dependence (mem, t)
+ && ! sched_insns_conditions_mutex_p (insn, pending->insn 
(
+   note_mem_dep (t, mem, pending->insn (), DEP_ANTI);
 
  pending = pending->next ();
  pending_mem = pending_mem->next ();
@@ -2637,12 +2657,10 @@ sched_analyze_2 (class deps_desc *deps, rtx x, rtx_insn 
*insn)
pending_mem = deps->pending_read_mems;
while (pending)
  {
-   if (read_dependence (pending_mem->element (), t)
-   && ! sched_insns_conditions_mutex_p (insn,
-pending->insn ()))
- note_mem_dep (t, pending_mem->element (),
-   pending->insn (),
-   DEP_ANTI);
+   rtx mem = pending_mem->element ();
+   if (MEM_P (mem) && read_dependence (mem, t)
+   && ! sched_insns_conditions_mutex_p (insn, pending->insn 
()))
+ note_mem_dep (t, mem, pending->insn (), DEP_ANTI);
 
pending = pending->next ();
pending_mem = pending_mem->next ();
@@ -3026,8 +3044,7 @@ sched_analyze_insn (class deps_desc

[gcc r15-5658] [PR117105][LRA]: Use unique value reload pseudo for early clobber operand

2024-11-25 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:4b09e2c67ef593db171b0755b46378964421782b

commit r15-5658-g4b09e2c67ef593db171b0755b46378964421782b
Author: Vladimir N. Makarov 
Date:   Mon Nov 25 16:09:00 2024 -0500

[PR117105][LRA]: Use unique value reload pseudo for early clobber operand

LRA did not generate insn satisfying insn constraints on the PR
test.  The reason for this is that LRA assigned the same hard reg for
two conflicting reload pseudos.  The two insn reload pseudos are
originated from the same pseudo and LRA tried to optimize as it
assigned the same value for the reload pseudos.  It is an LRA
optimization to minimize reload insns.  The two reload pseudos
conflict as one of them is an early clobber insn operands.  The patch
solves this problem by assigning unique value if the operand is early
clobber one.

gcc/ChangeLog:

PR target/117105
* lra-constraints.cc (get_reload_reg): Create unique value reload
pseudos for early clobbered operands.

gcc/testsuite/ChangeLog:

PR target/117105
* gcc.target/i386/pr117105.c: New test.

Diff:
---
 gcc/lra-constraints.cc   |  3 ++-
 gcc/testsuite/gcc.target/i386/pr117105.c | 15 +++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 61bbb930b7f4..052e5f71f1e1 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -663,7 +663,6 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx 
original,
 {
   int i, regno;
   enum reg_class new_class;
-  bool unique_p = false;
 
   if (type == OP_OUT)
 {
@@ -702,6 +701,8 @@ get_reload_reg (enum op_type type, machine_mode mode, rtx 
original,
exclude_start_hard_regs, title);
   return true;
 }
+
+  bool unique_p = early_clobber_p;
   /* Prevent reuse value of expression with side effects,
  e.g. volatile memory.  */
   if (! side_effects_p (original))
diff --git a/gcc/testsuite/gcc.target/i386/pr117105.c 
b/gcc/testsuite/gcc.target/i386/pr117105.c
new file mode 100644
index ..252bb138c9c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117105.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-code-hoisting -fno-tree-fre -fno-tree-dominator-opts 
-fno-tree-pre -fno-tree-sra" } */
+int a;
+struct b {
+  char c;
+  char d;
+};
+int main() {
+  struct b e;
+  int f;
+  while (a)
+if (f == e.d)
+  f = e.c = e.d & 1 >> e.d;
+  return 0;
+}


[gcc r15-5802] [PR117770][LRA]: Check hard regs corresponding insn operands for hard reg clobbers

2024-11-29 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:e79583cef924f5fb5de551bd61da7b5fdee5c690

commit r15-5802-ge79583cef924f5fb5de551bd61da7b5fdee5c690
Author: Vladimir N. Makarov 
Date:   Fri Nov 29 14:58:47 2024 -0500

[PR117770][LRA]: Check hard regs corresponding insn operands for hard reg 
clobbers

When LRA processes early clobbered hard regs explicitly present in the
insn description, it checks that the hard reg is also used as input.
If the hard reg is not an input also, it is marked as dying.  For the
check LRA processed only input hard reg also explicitly present in the
insn description.  For given PR, the hard reg is used as input as the
operand and is not present explicitly in the insn description and
therefore LRA marked the hard reg as dying.  This results in wrong
allocation and wrong code.  The patch solves the problem by processing
hard regs used as the insn operand.

gcc/ChangeLog:

PR rtl-optimization/117770
* lra-lives.cc: Include ira-int.h.
(process_bb_lives): Check hard regs corresponding insn operands
for dying hard wired reg clobbers.

Diff:
---
 gcc/lra-lives.cc | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/lra-lives.cc b/gcc/lra-lives.cc
index 66c6577e5d60..49134ade713d 100644
--- a/gcc/lra-lives.cc
+++ b/gcc/lra-lives.cc
@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3.If not see
 #include "insn-config.h"
 #include "regs.h"
 #include "ira.h"
+#include "ira-int.h"
 #include "recog.h"
 #include "cfganal.h"
 #include "sparseset.h"
@@ -990,6 +991,19 @@ process_bb_lives (basic_block bb, int &curr_point, bool 
dead_insn_p)
for (reg2 = curr_static_id->hard_regs; reg2 != NULL; reg2 = 
reg2->next)
  if (reg2->type != OP_OUT && reg2->regno == reg->regno)
break;
+   if (reg2 != NULL)
+ continue;
+
+   HARD_REG_SET clobbered_regset;
+   CLEAR_HARD_REG_SET (clobbered_regset);
+   SET_HARD_REG_BIT (clobbered_regset, reg->regno);
+
+   for (reg2 = curr_id->regs; reg2 != NULL; reg2 = reg2->next)
+ if (reg2->type != OP_OUT && reg2->regno < FIRST_PSEUDO_REGISTER
+ && ira_hard_reg_set_intersection_p (reg2->regno,
+ reg2->biggest_mode,
+ clobbered_regset))
+   break;
if (reg2 == NULL)
  make_hard_regno_dead (reg->regno);
  }


[gcc r15-5529] [PR116587][LRA]: Fix last chance reload pseudo allocation

2024-11-20 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:56fc6a6d9edc9f9170285ef31c7f312608fad88c

commit r15-5529-g56fc6a6d9edc9f9170285ef31c7f312608fad88c
Author: Vladimir N. Makarov 
Date:   Wed Nov 20 14:25:41 2024 -0500

[PR116587][LRA]: Fix last chance reload pseudo allocation

On i686 PR116587 test compilation resulted in LRA failure to find
registers for a reload insn pseudo.  The insn requires 6 regs for 4
reload insn pseudos where two of them require 2 regs each.  But we
have only 5 free regs as sp is a fixed reg, bp is fixed because of
-fno-omit-frame-pointer, bx is assigned to pic_offset_table_pseudo
because of -fPIC.  LRA spills pic_offset_table_pseudo as the last
chance approach to allocate registers to the reload pseudo.  Although
it makes 2 free registers for the unallocated reload pseudo requiring
also 2 regs, the pseudo still can not be allocated as the 2 free regs
are disjoint.  The patch spills all pseudos conflicting with the
unallocated reload pseudo including already allocated reload insn
pseudos, then standard LRA code allocates spilled pseudos requiring
more one register first and avoid situation of the disjoint regs for
reload pseudos requiring more one reg.

gcc/ChangeLog:

PR target/116587
* lra-assigns.cc (find_all_spills_for): Consider all pseudos whose
classes intersect given pseudo class.

gcc/testsuite/ChangeLog:

PR target/116587
* gcc.target/i386/pr116587.c: New test.

Diff:
---
 gcc/lra-assigns.cc   |  9 +
 gcc/testsuite/gcc.target/i386/pr116587.c | 27 +++
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/gcc/lra-assigns.cc b/gcc/lra-assigns.cc
index bcd7967ec7d9..0a14bde5e743 100644
--- a/gcc/lra-assigns.cc
+++ b/gcc/lra-assigns.cc
@@ -1362,14 +1362,7 @@ find_all_spills_for (int regno)
{
  if (live_pseudos_reg_renumber[r2->regno] >= 0
  && ! sparseset_bit_p (live_range_hard_reg_pseudos, r2->regno)
- && rclass_intersect_p[regno_allocno_class_array[r2->regno]]
- && ((int) r2->regno < lra_constraint_new_regno_start
- || bitmap_bit_p (&lra_inheritance_pseudos, r2->regno)
- || bitmap_bit_p (&lra_split_regs, r2->regno)
- || bitmap_bit_p (&lra_optional_reload_pseudos, r2->regno)
- /* There is no sense to consider another reload
-pseudo if it has the same class.  */
- || regno_allocno_class_array[r2->regno] != rclass))
+ && rclass_intersect_p[regno_allocno_class_array[r2->regno]])
sparseset_set_bit (live_range_hard_reg_pseudos, r2->regno);
}
}
diff --git a/gcc/testsuite/gcc.target/i386/pr116587.c 
b/gcc/testsuite/gcc.target/i386/pr116587.c
new file mode 100644
index ..092830002d2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116587.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fPIC -mstackrealign -mavx512f 
-fharden-control-flow-redundancy -fno-omit-frame-pointer -mbmi 
-fkeep-gc-roots-live" } */
+
+typedef __UINT64_TYPE__ a;
+int b;
+struct c {
+  a d;
+};
+extern char e[];
+int f;
+void g();
+char *h(struct c *i, a d) {
+  while (b) {
+if ((i->d & d) == i->d) {
+  if (f)
+g();
+  g();
+  d &= ~i->d;
+}
+++i;
+  }
+  if (d)
+g();
+  if (f)
+return "";
+  return e;
+}


[gcc r15-7379] [PR115568][LRA]: Use more strict output reload check in rematerialization

2025-02-05 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:98545441308c2ae4d535f14b108ad6551fd927d5

commit r15-7379-g98545441308c2ae4d535f14b108ad6551fd927d5
Author: Vladimir N. Makarov 
Date:   Wed Feb 5 14:23:23 2025 -0500

[PR115568][LRA]: Use more strict output reload check in rematerialization

  In this PR case LRA rematerialized a value from inheritance insn
instead of output reload one.  This resulted in considering a
rematerilization candidate value available when it was actually
not.  As a consequence an insn after rematerliazation used the
unexpected value and this use resulted in fp exception.  The patch
fixes this bug.

gcc/ChangeLog:

PR rtl-optimization/115568
* lra-remat.cc (create_cands): Check that output reload insn is
adjacent to given insn.  Update a comment.

gcc/testsuite/ChangeLog:

PR rtl-optimization/115568
* gcc.target/i386/pr115568.c: New.

Diff:
---
 gcc/lra-remat.cc | 10 +
 gcc/testsuite/gcc.target/i386/pr115568.c | 38 
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/gcc/lra-remat.cc b/gcc/lra-remat.cc
index bb13c616a740..2f3afffcf5be 100644
--- a/gcc/lra-remat.cc
+++ b/gcc/lra-remat.cc
@@ -459,7 +459,8 @@ create_cands (void)
if (insn2 != NULL
&& dst_regno >= FIRST_PSEUDO_REGISTER
&& reg_renumber[dst_regno] < 0
-   && BLOCK_FOR_INSN (insn2) == BLOCK_FOR_INSN (insn))
+   && BLOCK_FOR_INSN (insn2) == BLOCK_FOR_INSN (insn)
+   && insn2 == prev_nonnote_insn (insn))
  {
create_cand (insn2, regno_potential_cand[src_regno].nop,
 dst_regno, insn);
@@ -473,9 +474,10 @@ create_cands (void)
gcc_assert (REG_P (*id->operand_loc[nop]));
int regno = REGNO (*id->operand_loc[nop]);
gcc_assert (regno >= FIRST_PSEUDO_REGISTER);
-   /* If we're setting an unrenumbered pseudo, make a candidate 
immediately.
-  If it's an output reload register, save it for later; the code 
above
-  looks for output reload insns later on.  */
+   /* If we're setting an unrenumbered pseudo, make a candidate
+  immediately.  If it's a potential output reload register, save
+  it for later; the code above looks for output reload insns later
+  on.  */
if (reg_renumber[regno] < 0)
  create_cand (insn, nop, regno);
else if (regno >= lra_constraint_new_regno_start)
diff --git a/gcc/testsuite/gcc.target/i386/pr115568.c 
b/gcc/testsuite/gcc.target/i386/pr115568.c
new file mode 100644
index ..cedc7ac3843d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115568.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-tree-sink -fno-tree-ter -fschedule-insns" } */
+
+int a, c, d = 1, e, f = 1, h, i, j;
+unsigned b = 1, g;
+int main() {
+  for (; h < 2; h++) {
+int k = ~(b || 0), l = ((~e - j) ^ a % b) % k, m = (b ^ -1) + e;
+unsigned o = ~a % ~1;
+if (f) {
+  l = d;
+  m = 10;
+  i = e;
+  d = -(~e + b);
+  g = o % m;
+  e = -1;
+n:
+  a = a % ~i;
+  b = ~k;
+  if (!g) {
+b = e + o % -1;
+continue;
+  }
+  if (!l)
+break;
+}
+int q = (~d + g) << ~e, p = (~d - q) & a >> b;
+unsigned s = ~((g & e) + (p | (b ^ (d + k;
+int r = (e & s) + p, u = d | ~a,
+t = ((~(q + (~a + (s + e & u) | (-g & (c << d ^ p));
+if (t)
+  if (!r)
+goto n;
+g = m;
+e = i;
+  }
+  return 0;
+}


[gcc r15-7305] [PR116234][LRA]: Check debug insn when looking at one insn pseudo occurrence

2025-01-31 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:decc6c0d4d909ce510b6533c48d70d0b353f909a

commit r15-7305-gdecc6c0d4d909ce510b6533c48d70d0b353f909a
Author: Vladimir N. Makarov 
Date:   Fri Jan 31 09:39:45 2025 -0500

[PR116234][LRA]: Check debug insn when looking at one insn pseudo occurrence

  LRA can change reg class to NO_REGS when pseudo referred in one
insn.  Checking the references did not take into account that referring
insn can be a debug insn.  This resulted in different code generation
with and without debug info generation.  The patch fixes this pitfall.

gcc/ChangeLog:

PR rtl-optimization/116234
* lra-constraints.cc (multiple_insn_refs_p): New function.
(curr_insn_transform): Use it.

gcc/testsuite/ChangeLog:

PR rtl-optimization/116234
* gfortran.target/aarch64/aarch64.exp: New.
* gfortran.target/aarch64/pr116234.f: New.

Diff:
---
 gcc/lra-constraints.cc| 20 +-
 gcc/testsuite/gfortran.target/aarch64/aarch64.exp | 45 +
 gcc/testsuite/gfortran.target/aarch64/pr116234.f  | 80 +++
 3 files changed, 144 insertions(+), 1 deletion(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index ee3fd7a503aa..0659aed94c7d 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -4144,6 +4144,24 @@ static bool invalid_mode_reg_p (enum machine_mode mode, 
rtx x)
  ira_prohibited_class_mode_regs[rclass][mode]));
 }
 
+/* Return TRUE if regno is referenced in more than one non-debug insn.  */
+static bool
+multiple_insn_refs_p (int regno)
+{
+  unsigned int uid;
+  bitmap_iterator bi;
+  int nrefs = 0;
+  EXECUTE_IF_SET_IN_BITMAP (&lra_reg_info[regno].insn_bitmap, 0, uid, bi)
+{
+  if (!NONDEBUG_INSN_P (lra_insn_recog_data[uid]->insn))
+   continue;
+  if (nrefs == 1)
+   return true;
+  nrefs++;
+}
+  return false;
+}
+
 /* Main entry point of the constraint code: search the body of the
current insn to choose the best alternative.  It is mimicking insn
alternative cost calculation model of former reload pass.  That is
@@ -4602,7 +4620,7 @@ curr_insn_transform (bool check_only_p)
 registers for other pseudos referenced in the insn.  The most
 common case of this is a scratch register which will be
 transformed to scratch back at the end of LRA.  */
- && bitmap_single_bit_set_p (&lra_reg_info[regno].insn_bitmap))
+ && !multiple_insn_refs_p (regno))
{
  if (lra_get_allocno_class (regno) != NO_REGS)
lra_change_class (regno, NO_REGS, "  Change to", true);
diff --git a/gcc/testsuite/gfortran.target/aarch64/aarch64.exp 
b/gcc/testsuite/gfortran.target/aarch64/aarch64.exp
new file mode 100644
index ..79afc6993187
--- /dev/null
+++ b/gcc/testsuite/gfortran.target/aarch64/aarch64.exp
@@ -0,0 +1,45 @@
+#   Copyright (C) 2025 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# .
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't a aarch64 target.
+if { ![istarget aarch64*-*-*] } then {
+  return
+}
+
+# Make sure there is a fortran compiler to test.
+if { ![check_no_compiler_messages fortran_available assembly {
+! Fortran
+program P
+  stop
+end program P
+} ""] } {
+return
+}
+
+# Load support procs.
+load_lib gfortran-dg.exp
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+gfortran-dg-runtest [lsort \
+   [glob -nocomplain $srcdir/$subdir/*.\[fF\]{,90,95,03,08} ] ] "" ""
+
+# All done.
+dg-finish
diff --git a/gcc/testsuite/gfortran.target/aarch64/pr116234.f 
b/gcc/testsuite/gfortran.target/aarch64/pr116234.f
new file mode 100644
index ..78b49bc86f10
--- /dev/null
+++ b/gcc/testsuite/gfortran.target/aarch64/pr116234.f
@@ -0,0 +1,80 @@
+! { dg-do compile }
+! { dg-options "-fcompare-debug -mcpu=phecda -O2 -funroll-all-loops -c 
-fno-rename-registers -fno-ivopts" }
+
+  SUBROUTINE FOO(UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX,
+ $ EQUED)
+
+  CHARACTER  EQUED, UPLO
+  INTEGERIA, JA
+  DOUBLE PRECISION   AMAX, SCOND
+
+  INTEGERDESCA(*)
+  DOUBLE PRECISION   A(*), SR(*)
+
+  INTEGERIACOL, IAROW, IC, IIA, I

[gcc r15-6122] [PR116778][LRA]: Check pseudos assigned to FP after rematerialization to build live ranges

2024-12-11 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:fca0ab08cd936464b152e9b45356f625eba27575

commit r15-6122-gfca0ab08cd936464b152e9b45356f625eba27575
Author: Vladimir N. Makarov 
Date:   Wed Dec 11 15:36:21 2024 -0500

[PR116778][LRA]: Check pseudos assigned to FP after rematerialization to 
build live ranges

This is a better fix of the PR permitting to avoid building live
ranges after rematerialization.  It checks that FP can not be
eliminated now and that pseudos assigned to FP will be spilled.  In
this case we need to build live ranges after rematerialization for
correct assignments of stack slots to spilled pseudos involved in
rematerialization.

gcc/ChangeLog:

PR rtl-optimization/116778
* ira-int.h (x_ira_class_hard_reg_index): Fix comment typo.
* lra-eliminations.cc (lra_fp_pseudo_p): New function.
* lra-int.h (lra_fp_pseudo_p): External declaration.
* lra-spills.cc (lra_need_for_spills_p): Fix formatting.
* lra.cc (lra): Use lra_fp_pseudo_p in lra_create_live_range after
lra_remat.

Diff:
---
 gcc/ira-int.h   |  2 +-
 gcc/lra-eliminations.cc | 19 +++
 gcc/lra-int.h   |  1 +
 gcc/lra-spills.cc   |  3 ++-
 gcc/lra.cc  |  7 +--
 5 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index 8c3c5941de54..5ce930b6d22a 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -868,7 +868,7 @@ public:
 
   /* Index (in ira_class_hard_regs; for given register class and hard
  register (in general case a hard register can belong to several
- register classes;.  The index is negative for hard registers
+ register classes).  The index is negative for hard registers
  unavailable for the allocation.  */
   short x_ira_class_hard_reg_index[N_REG_CLASSES][FIRST_PSEUDO_REGISTER];
 
diff --git a/gcc/lra-eliminations.cc b/gcc/lra-eliminations.cc
index 96772f2904a6..5343d8c5102d 100644
--- a/gcc/lra-eliminations.cc
+++ b/gcc/lra-eliminations.cc
@@ -1428,6 +1428,25 @@ lra_update_fp2sp_elimination (int *spilled_pseudos)
   return n;
 }
 
+/* Return true if we have a pseudo assigned to hard frame pointer.  */
+bool
+lra_fp_pseudo_p (void)
+{
+  HARD_REG_SET set;
+
+  if (frame_pointer_needed)
+/* At this stage it means we have no pseudos assigned to FP:  */
+return false;
+  CLEAR_HARD_REG_SET (set);
+  add_to_hard_reg_set (&set, Pmode, HARD_FRAME_POINTER_REGNUM);
+  for (int i = FIRST_PSEUDO_REGISTER; i < max_reg_num (); i++)
+if (lra_reg_info[i].nrefs != 0 && reg_renumber[i] >= 0
+   && overlaps_hard_reg_set_p (set, PSEUDO_REGNO_MODE (i),
+   reg_renumber[i]))
+  return true;
+  return false;
+}
+
 /* Entry function to do final elimination if FINAL_P or to update
elimination register offsets (FIRST_P if we are doing it the first
time).  */
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 5f605c3ae410..abee008e6423 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -419,6 +419,7 @@ extern rtx lra_eliminate_regs_1 (rtx_insn *, rtx, 
machine_mode,
 bool, bool, poly_int64, bool);
 extern void eliminate_regs_in_insn (rtx_insn *insn, bool, bool, poly_int64);
 extern int lra_update_fp2sp_elimination (int *spilled_pseudos);
+extern bool lra_fp_pseudo_p (void);
 extern void lra_eliminate (bool, bool);
 
 extern poly_int64 lra_update_sp_offset (rtx, poly_int64);
diff --git a/gcc/lra-spills.cc b/gcc/lra-spills.cc
index 3f5c8d2bcb00..6e9a8c3e34e3 100644
--- a/gcc/lra-spills.cc
+++ b/gcc/lra-spills.cc
@@ -594,8 +594,9 @@ lra_need_for_scratch_reg_p (void)
 bool
 lra_need_for_spills_p (void)
 {
-  int i; max_regno = max_reg_num ();
+  int i;
 
+  max_regno = max_reg_num ();
   for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
 if (lra_reg_info[i].nrefs != 0 && lra_get_regno_hard_regno (i) < 0
&& ! ira_former_scratch_p (i))
diff --git a/gcc/lra.cc b/gcc/lra.cc
index 6b740ed23252..55737deba3f1 100644
--- a/gcc/lra.cc
+++ b/gcc/lra.cc
@@ -2555,8 +2555,11 @@ lra (FILE *f, int verbose)
 rematerialize them first.  */
   if (lra_remat ())
{
- /* We need full live info -- see the comment above.  */
- lra_create_live_ranges (true, true);
+ /* We need full live info -- see the comment above.  We also might
+need live info if we have a pseudo assigned to hard frame pointer
+reg and will need FP for usual purposes.  */
+ lra_create_live_ranges (lra_reg_spill_p || lra_fp_pseudo_p (),
+ true);
  live_p = true;
  if (! lra_need_for_spills_p ())
{


[gcc r15-6091] [PR117946][LRA]: When assigning hard reg use biggest mode to check ira_prohibited_class_mode_regs

2024-12-10 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:6fc3da8fa2af1d4ee154ea803636eabde358b553

commit r15-6091-g6fc3da8fa2af1d4ee154ea803636eabde358b553
Author: Vladimir N. Makarov 
Date:   Tue Dec 10 12:50:27 2024 -0500

[PR117946][LRA]: When assigning hard reg use biggest mode to check 
ira_prohibited_class_mode_regs

A pseudo in the PR test case gets hard reg 43 which is x86 r15 (after
r15, xmm regs go).  The pseudo is of INT_SSE_CLASS and SImode but is
used in TImode as paradoxical subreg.  r15 in TImode is wrong and does
not satisfy constraint 'r'.  Therefore LRA creates moves involving the
pseudo in TImode until the limit of reload insns is achieved.
Unfortunately x86 hard_regno_mode_ok (as some hooks for other targets)
says that it is ok to use r15 for TImode pseudo.  Therefore LRA uses
ira_prohibited_class_mode_regs for such cases but it was checked
against native pseudo mode.  The patch fixes it by using the biggest
pseudo mode.

gcc/ChangeLog:

PR rtl-optimization/117946
* lra-assigns.cc: (find_hard_regno_for_1): Use the biggest mode to
check ira_prohibited_class_mode_regs.

gcc/testsuite/ChangeLog:

PR rtl-optimization/117946
* gcc.target/i386/pr117946.c: New.

Diff:
---
 gcc/lra-assigns.cc   |  5 ++---
 gcc/testsuite/gcc.target/i386/pr117946.c | 16 
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/gcc/lra-assigns.cc b/gcc/lra-assigns.cc
index 0a14bde5e743..405afc06f57e 100644
--- a/gcc/lra-assigns.cc
+++ b/gcc/lra-assigns.cc
@@ -629,13 +629,12 @@ find_hard_regno_for_1 (int regno, int *cost, int 
try_only_hard_regno,
hard_regno = ira_class_hard_regs[rclass][i];
   if (! overlaps_hard_reg_set_p (conflict_set,
 PSEUDO_REGNO_MODE (regno), hard_regno)
- && targetm.hard_regno_mode_ok (hard_regno,
-PSEUDO_REGNO_MODE (regno))
+ && targetm.hard_regno_mode_ok (hard_regno, PSEUDO_REGNO_MODE (regno))
  /* We cannot use prohibited_class_mode_regs for all classes
 because it is not defined for all classes.  */
  && (ira_allocno_class_translate[rclass] != rclass
  || ! TEST_HARD_REG_BIT (ira_prohibited_class_mode_regs
- [rclass][PSEUDO_REGNO_MODE (regno)],
+ [rclass][biggest_mode],
  hard_regno))
  && ! TEST_HARD_REG_BIT (impossible_start_hard_regs, hard_regno)
  && (nregs_diff == 0
diff --git a/gcc/testsuite/gcc.target/i386/pr117946.c 
b/gcc/testsuite/gcc.target/i386/pr117946.c
new file mode 100644
index ..7304e01d1a7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117946.c
@@ -0,0 +1,16 @@
+/* { dg-do compile  { target { ! ia32 } } } */
+/* { dg-options "-O -favoid-store-forwarding -mavx10.1 -mprefer-avx128 
--param=store-forwarding-max-distance=128 -Wno-psabi" } */
+typedef __attribute__((__vector_size__ (64))) _Decimal32 V;
+
+void
+bar (float, float, float, float, float, _Complex, float, float, float,
+ _BitInt(1023), _BitInt (1023), float, float, float, float, float, float,
+ float, float, float, float, float, float, _Decimal64, float, float, float,
+ V, float, _Decimal64);
+
+void
+foo ()
+{
+  bar (0, 0, 0, 0, 0, 0, 0, __builtin_nand64 ("nan"), 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (V){}, 0, 0);
+}


[gcc r15-6349] Revert "[PR117248][LRA]: Rewriting reg notes update and fix calculation of conflict hard regs of pse

2024-12-18 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:1a28ff1c01c290d50fb4ebd6e6a49482195cab9c

commit r15-6349-g1a28ff1c01c290d50fb4ebd6e6a49482195cab9c
Author: Vladimir N. Makarov 
Date:   Wed Dec 18 13:28:43 2024 -0500

Revert "[PR117248][LRA]: Rewriting reg notes update and fix calculation of 
conflict hard regs of pseudo."

This reverts commit 75e7d1600f47859df40b2ac0feff5a71e0dbb040.

Diff:
---
 gcc/lra-lives.cc | 284 +--
 1 file changed, 107 insertions(+), 177 deletions(-)

diff --git a/gcc/lra-lives.cc b/gcc/lra-lives.cc
index 6286c5ad5a83..510f7d927ab0 100644
--- a/gcc/lra-lives.cc
+++ b/gcc/lra-lives.cc
@@ -84,10 +84,10 @@ static sparseset pseudos_live_through_setjumps;
 /* Set of hard regs (except eliminable ones) currently live.  */
 static HARD_REG_SET hard_regs_live;
 
-/* Set of pseudos and hard registers in the current insn, only out/inout ones,
-   and the current insn pseudos and hard registers living right after the
-   insn.  */
-static sparseset insn_regnos, out_insn_regnos, insn_regnos_live_after;
+/* Set of pseudos and hard registers start living/dying in the current
+   insn.  These sets are used to update REG_DEAD and REG_UNUSED notes
+   in the insn.  */
+static sparseset start_living, start_dying;
 
 /* Set of pseudos and hard regs dead and unused in the current
insn.  */
@@ -228,6 +228,17 @@ enum point_type {
   USE_POINT
 };
 
+/* Return TRUE if set A contains a pseudo register, otherwise, return FALSE.  
*/
+static bool
+sparseset_contains_pseudos_p (sparseset a)
+{
+  int regno;
+  EXECUTE_IF_SET_IN_SPARSESET (a, regno)
+if (!HARD_REGISTER_NUM_P (regno))
+  return true;
+  return false;
+}
+
 /* Mark pseudo REGNO as living or dying at program point POINT, depending on
whether TYPE is a definition or a use.  If this is the first reference to
REGNO that we've encountered, then create a new live range for it.  */
@@ -266,29 +277,29 @@ update_pseudo_point (int regno, int point, enum 
point_type type)
 /* The corresponding bitmaps of BB currently being processed.  */
 static bitmap bb_killed_pseudos, bb_gen_pseudos;
 
-/* Record hard register REGNO as now being live.  Return true if REGNO liveness
-   changes.  */
-static bool
+/* Record hard register REGNO as now being live.  It updates
+   living hard regs and START_LIVING.  */
+static void
 make_hard_regno_live (int regno)
 {
   lra_assert (HARD_REGISTER_NUM_P (regno));
   if (TEST_HARD_REG_BIT (hard_regs_live, regno)
   || TEST_HARD_REG_BIT (eliminable_regset, regno))
-return false;
+return;
   SET_HARD_REG_BIT (hard_regs_live, regno);
+  sparseset_set_bit (start_living, regno);
   if (fixed_regs[regno] || TEST_HARD_REG_BIT (hard_regs_spilled_into, regno))
 bitmap_set_bit (bb_gen_pseudos, regno);
-  return true;
 }
 
-/* Process the definition of hard register REGNO.  This updates hard_regs_live
-   and conflict hard regs for living pseudos.  Return true if REGNO liveness
-   changes.  */
-static bool
+/* Process the definition of hard register REGNO.  This updates
+   hard_regs_live, START_DYING and conflict hard regs for living
+   pseudos.  */
+static void
 make_hard_regno_dead (int regno)
 {
   if (TEST_HARD_REG_BIT (eliminable_regset, regno))
-return false;
+return;
 
   lra_assert (HARD_REGISTER_NUM_P (regno));
   unsigned int i;
@@ -296,89 +307,79 @@ make_hard_regno_dead (int regno)
 SET_HARD_REG_BIT (lra_reg_info[i].conflict_hard_regs, regno);
 
   if (! TEST_HARD_REG_BIT (hard_regs_live, regno))
-return false;
+return;
   CLEAR_HARD_REG_BIT (hard_regs_live, regno);
+  sparseset_set_bit (start_dying, regno);
   if (fixed_regs[regno] || TEST_HARD_REG_BIT (hard_regs_spilled_into, regno))
 {
   bitmap_clear_bit (bb_gen_pseudos, regno);
   bitmap_set_bit (bb_killed_pseudos, regno);
 }
-  return true;
 }
 
-/* Mark pseudo REGNO as now being live.  Return true if REGNO liveness
-   changes.  */
-static bool
+/* Mark pseudo REGNO as now being live and update START_LIVING.  */
+static void
 mark_pseudo_live (int regno)
 {
   lra_assert (!HARD_REGISTER_NUM_P (regno));
   if (sparseset_bit_p (pseudos_live, regno))
-return false;
+return;
+
   sparseset_set_bit (pseudos_live, regno);
-  return true;
+  sparseset_set_bit (start_living, regno);
 }
 
-/* Mark pseudo REGNO as now being dead.  Return true if REGNO liveness
-   changes.  */
-static bool
+/* Mark pseudo REGNO as now being dead and update START_DYING.  */
+static void
 mark_pseudo_dead (int regno)
 {
   lra_assert (!HARD_REGISTER_NUM_P (regno));
   lra_reg_info[regno].conflict_hard_regs |= hard_regs_live;
   if (!sparseset_bit_p (pseudos_live, regno))
-return false;
+return;
 
   sparseset_clear_bit (pseudos_live, regno);
-  return true;
+  sparseset_set_bit (start_dying, regno);
 }
 
-/* Mark register REGNO (pseudo or hard register) in MODE as being live and
-   update BB_GEN_PSEUDOS.  Return true if REGNO liveness changes.  */
-static bool
+/* Mark re

[gcc r15-6351] [PR117248][LRA]: Fix calculation of conflict hard regs of pseudo

2024-12-18 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:24df430108c0cdf83d7cccd69367a977adca7da0

commit r15-6351-g24df430108c0cdf83d7cccd69367a977adca7da0
Author: Vladimir N. Makarov 
Date:   Wed Dec 18 14:00:38 2024 -0500

[PR117248][LRA]: Fix calculation of conflict hard regs of pseudo

The 1st patch for PR117248 resulted in PR117299 (libgo failures on arm).  
So this is a patch
solving the problem in another way.

gcc/ChangeLog:

PR rtl-optimization/117248
* lra-lives.cc (process_bb_lives): Update conflict hard regs even
when clobber hard reg is not marked as dead.

Diff:
---
 gcc/lra-lives.cc | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/lra-lives.cc b/gcc/lra-lives.cc
index 510f7d927ab0..94cb000bc5a1 100644
--- a/gcc/lra-lives.cc
+++ b/gcc/lra-lives.cc
@@ -1006,7 +1006,15 @@ process_bb_lives (basic_block bb, int &curr_point, bool 
dead_insn_p)
  clobbered_regset))
break;
if (reg2 == NULL)
- make_hard_regno_dead (reg->regno);
+ {
+   make_hard_regno_dead (reg->regno);
+ }
+   else
+ {
+   EXECUTE_IF_SET_IN_SPARSESET (pseudos_live, j)
+ SET_HARD_REG_BIT (lra_reg_info[j].conflict_hard_regs,
+   reg->regno);
+ }
  }
 
   /* Increment the current program point if we must.  */


[gcc r15-5997] [PR117248][LRA]: Rewriting reg notes update and fix calculation of conflict hard regs of pseudo.

2024-12-06 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:75e7d1600f47859df40b2ac0feff5a71e0dbb040

commit r15-5997-g75e7d1600f47859df40b2ac0feff5a71e0dbb040
Author: Vladimir N. Makarov 
Date:   Fri Dec 6 16:16:28 2024 -0500

[PR117248][LRA]: Rewriting reg notes update and fix calculation of conflict 
hard regs of pseudo.

  LRA updates conflict hard regs of pseudo when some hard reg dies.  A
complicated PA div/mod insns reference for clobbered explicit hard regs and
hard reg as operands.  It prevents some hard reg dying although they
still conflict with pseudos living through.  Although on such insns LRA
updates wrongly reg notes (REG_DEAD, REG_UNUSED) which are used later in
rematerialization subpass.  The patch fixes the problems.

gcc/ChangeLog:

PR rtl-optimization/117248
* lra-lives.cc (start_living, start_dying): Remove.
(insn_regnos, out_insn_regnos, insn_regnos_live_after): New.
(sparseset_contains_pseudos_p): Remove.
(make_hard_regno_live, make_hard_regno_dead): Return true if
something in liveness is changed.
(mark_pseudo_live,  mark_pseudo_dead): Ditto.
(mark_regno_live, mark_regno_dead): Ditto.
(clear_sparseset_regnos, regnos_in_sparseset_p): Use set instead
of dead_set.
(process_bb_lives): Rewrite dealing with reg notes.  Update
conflict hard regs even when clobber hard reg is not marked as
dead.
(lra_create_live_ranges_1): Add initialization/finalization of
insn_regnos, out_insn_regnos, insn_regnos_live_after.

Diff:
---
 gcc/lra-lives.cc | 284 ++-
 1 file changed, 177 insertions(+), 107 deletions(-)

diff --git a/gcc/lra-lives.cc b/gcc/lra-lives.cc
index 49134ade713d..f1bb5701bc4f 100644
--- a/gcc/lra-lives.cc
+++ b/gcc/lra-lives.cc
@@ -83,10 +83,10 @@ static sparseset pseudos_live_through_setjumps;
 /* Set of hard regs (except eliminable ones) currently live.  */
 static HARD_REG_SET hard_regs_live;
 
-/* Set of pseudos and hard registers start living/dying in the current
-   insn.  These sets are used to update REG_DEAD and REG_UNUSED notes
-   in the insn.  */
-static sparseset start_living, start_dying;
+/* Set of pseudos and hard registers in the current insn, only out/inout ones,
+   and the current insn pseudos and hard registers living right after the
+   insn.  */
+static sparseset insn_regnos, out_insn_regnos, insn_regnos_live_after;
 
 /* Set of pseudos and hard regs dead and unused in the current
insn.  */
@@ -227,17 +227,6 @@ enum point_type {
   USE_POINT
 };
 
-/* Return TRUE if set A contains a pseudo register, otherwise, return FALSE.  
*/
-static bool
-sparseset_contains_pseudos_p (sparseset a)
-{
-  int regno;
-  EXECUTE_IF_SET_IN_SPARSESET (a, regno)
-if (!HARD_REGISTER_NUM_P (regno))
-  return true;
-  return false;
-}
-
 /* Mark pseudo REGNO as living or dying at program point POINT, depending on
whether TYPE is a definition or a use.  If this is the first reference to
REGNO that we've encountered, then create a new live range for it.  */
@@ -276,29 +265,29 @@ update_pseudo_point (int regno, int point, enum 
point_type type)
 /* The corresponding bitmaps of BB currently being processed.  */
 static bitmap bb_killed_pseudos, bb_gen_pseudos;
 
-/* Record hard register REGNO as now being live.  It updates
-   living hard regs and START_LIVING.  */
-static void
+/* Record hard register REGNO as now being live.  Return true if REGNO liveness
+   changes.  */
+static bool
 make_hard_regno_live (int regno)
 {
   lra_assert (HARD_REGISTER_NUM_P (regno));
   if (TEST_HARD_REG_BIT (hard_regs_live, regno)
   || TEST_HARD_REG_BIT (eliminable_regset, regno))
-return;
+return false;
   SET_HARD_REG_BIT (hard_regs_live, regno);
-  sparseset_set_bit (start_living, regno);
   if (fixed_regs[regno] || TEST_HARD_REG_BIT (hard_regs_spilled_into, regno))
 bitmap_set_bit (bb_gen_pseudos, regno);
+  return true;
 }
 
-/* Process the definition of hard register REGNO.  This updates
-   hard_regs_live, START_DYING and conflict hard regs for living
-   pseudos.  */
-static void
+/* Process the definition of hard register REGNO.  This updates hard_regs_live
+   and conflict hard regs for living pseudos.  Return true if REGNO liveness
+   changes.  */
+static bool
 make_hard_regno_dead (int regno)
 {
   if (TEST_HARD_REG_BIT (eliminable_regset, regno))
-return;
+return false;
 
   lra_assert (HARD_REGISTER_NUM_P (regno));
   unsigned int i;
@@ -306,79 +295,89 @@ make_hard_regno_dead (int regno)
 SET_HARD_REG_BIT (lra_reg_info[i].conflict_hard_regs, regno);
 
   if (! TEST_HARD_REG_BIT (hard_regs_live, regno))
-return;
+return false;
   CLEAR_HARD_REG_BIT (hard_regs_live, regno);
-  sparseset_set_bit (start_dying, regno);
   if (fixed_regs[regno] || TEST_HARD_REG_BIT (hard_regs_spilled_into, regno))
 {

[gcc r15-6751] [PR118017][LRA]: Don't inherit reg of non-uniform reg class

2025-01-09 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:fab96de044f1f023f52d43af866205d17d8895fb

commit r15-6751-gfab96de044f1f023f52d43af866205d17d8895fb
Author: Vladimir N. Makarov 
Date:   Thu Jan 9 16:22:02 2025 -0500

[PR118017][LRA]: Don't inherit reg of non-uniform reg class

In the PR case LRA inherited value of register of class INT_SSE_REGS
which resulted in LRA cycling when LRA tried to use different move
alternatives with SSE/general regs and memory.  The patch rejects to
inherit such (non-uniform) classes to prevent cycling.

gcc/ChangeLog:

PR target/118017
* lra-constraints.cc (inherit_reload_reg): Check reg class on 
uniformity.

gcc/testsuite/ChangeLog:

PR target/118017
* gcc.target/i386/pr118017.c: New.

Diff:
---
 gcc/lra-constraints.cc   | 14 ++
 gcc/testsuite/gcc.target/i386/pr118017.c | 21 +
 2 files changed, 35 insertions(+)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index a0f05b290dde..8f32e98f1c47 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -5878,6 +5878,20 @@ inherit_reload_reg (bool def_p, int original_regno,
}
   return false;
 }
+  if (ira_reg_class_min_nregs[rclass][GET_MODE (original_reg)]
+  != ira_reg_class_max_nregs[rclass][GET_MODE (original_reg)])
+{
+  if (lra_dump_file != NULL)
+   {
+ fprintf (lra_dump_file,
+  "Rejecting inheritance for %d "
+  "because of requiring non-uniform class %s\n",
+  original_regno, reg_class_names[rclass]);
+ fprintf (lra_dump_file,
+  ">>\n");
+   }
+  return false;
+}
   new_reg = lra_create_new_reg (GET_MODE (original_reg), original_reg,
rclass, NULL, "inheritance");
   start_sequence ();
diff --git a/gcc/testsuite/gcc.target/i386/pr118017.c 
b/gcc/testsuite/gcc.target/i386/pr118017.c
new file mode 100644
index ..c82d71e8d293
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr118017.c
@@ -0,0 +1,21 @@
+/* PR target/118017 */
+/* { dg-do compile } */
+/* { dg-options "-Og -frounding-math -mno-80387 -mno-mmx -Wno-psabi" } */
+
+typedef __attribute__((__vector_size__ (64))) _Float128 F;
+typedef __attribute__((__vector_size__ (64))) _Decimal64 G;
+typedef __attribute__((__vector_size__ (64))) _Decimal128 H;
+
+void
+bar(_Float32, _BitInt(1025), _BitInt(1025), _Float128, __int128, __int128,  F,
+int, int, G, _Float64, __int128, __int128, H, F);
+
+
+void
+foo ()
+{
+  bar ((__int128)68435455, 0, 0, 0, 0, 0, (F){}, 0, 0, 
(G){3689348814741910323},
+   0, 0, 0, (H){0, (_Decimal128) ((__int128) 860933398830926 << 64),
+   (_Decimal128) ((__int128) 966483857959145 << 64), 4},
+   (F){(__int128) 3689348814741910323 << 64 | 3});
+}


[gcc r15-7185] [PR118497][IRA]: Fix calculation of cost of assigning callee-saved hard reg

2025-01-24 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:c4dae80357ccf2e035d8e9ec0a3bb319344c5b41

commit r15-7185-gc4dae80357ccf2e035d8e9ec0a3bb319344c5b41
Author: Vladimir N. Makarov 
Date:   Fri Jan 24 13:16:53 2025 -0500

[PR118497][IRA]: Fix calculation of cost of assigning callee-saved hard reg

  Assembler code generated by GCC for PR118497 contains unnecessary
move insn.  This happened as IRA assigns AX reg to a pseudo which
should be in BX reg later for a call.  The pseudo did not get BX as
LRA decided that it requires to save BX although BX will be saved
anyway.  The patch fixes the cost calculation.  Usage of hard reg
nrefs from regstat or DF will result in numerous failures as such
nrefs include artificial reg refs.  Therefore we add a calculation of
hard reg nrefs in IRA.  Also we change regexp used for scanning the
assembler in test vartrack-1.c as with the patch LRA assigns
callee-saved hard reg BP instead of another callee-saved hard reg BX
expected by the test.

gcc/ChangeLog:

PR target/118497
* ira-int.h (target_ira_int): Add x_ira_hard_regno_nrefs.
(ira_hard_regno_nrefs): New macro.
* ira.cc (setup_hard_regno_aclass): Remove unused code.  Modify
the comment.
(setup_hard_regno_nrefs): New function.
(ira): Call it.
* ira-color.cc (calculate_saved_nregs): Check
ira_hard_regno_nrefs.

gcc/testsuite/ChangeLog:

PR target/118497
* gcc.target/i386/pr118497.c: New.
* gcc.target/i386/vartrack-1.c: Modify the regexp.

Diff:
---
 gcc/ira-color.cc   |  1 +
 gcc/ira-int.h  |  5 
 gcc/ira.cc | 42 ++
 gcc/testsuite/gcc.target/i386/pr118497.c   | 16 
 gcc/testsuite/gcc.target/i386/vartrack-1.c | 12 -
 5 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc
index 23f68c007573..0699b349a1af 100644
--- a/gcc/ira-color.cc
+++ b/gcc/ira-color.cc
@@ -1752,6 +1752,7 @@ calculate_saved_nregs (int hard_regno, machine_mode mode)
   ira_assert (hard_regno >= 0);
   for (i = hard_regno_nregs (hard_regno, mode) - 1; i >= 0; i--)
 if (!allocated_hardreg_p[hard_regno + i]
+   && ira_hard_regno_nrefs[hard_regno + i] == 0
&& !crtl->abi->clobbers_full_reg_p (hard_regno + i)
&& !LOCAL_REGNO (hard_regno + i))
   nregs++;
diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index aa8432416fce..49e086e4d4b1 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -936,6 +936,9 @@ public:
 
   /* Flag of that the above array has been initialized.  */
   bool x_ira_prohibited_mode_move_regs_initialized_p;
+
+  /* Number of real occurences of hard regs before IRA.  */
+  size_t x_ira_hard_regno_nrefs[FIRST_PSEUDO_REGISTER];
 };
 
 extern class target_ira_int default_target_ira_int;
@@ -983,6 +986,8 @@ extern class target_ira_int *this_target_ira_int;
   (this_target_ira_int->x_ira_reg_class_superunion)
 #define ira_prohibited_mode_move_regs \
   (this_target_ira_int->x_ira_prohibited_mode_move_regs)
+#define ira_hard_regno_nrefs \
+  (this_target_ira_int->x_ira_hard_regno_nrefs)
 
 /* ira.cc: */
 
diff --git a/gcc/ira.cc b/gcc/ira.cc
index ad522b00f8b5..885239d1b43c 100644
--- a/gcc/ira.cc
+++ b/gcc/ira.cc
@@ -1416,7 +1416,7 @@ find_reg_classes (void)
 
 
 
-/* Set up the array above.  */
+/* Set up array ira_hard_regno_allocno_class.  */
 static void
 setup_hard_regno_aclass (void)
 {
@@ -1424,25 +1424,10 @@ setup_hard_regno_aclass (void)
 
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 {
-#if 1
   ira_hard_regno_allocno_class[i]
= (TEST_HARD_REG_BIT (no_unit_alloc_regs, i)
   ? NO_REGS
   : ira_allocno_class_translate[REGNO_REG_CLASS (i)]);
-#else
-  int j;
-  enum reg_class cl;
-  ira_hard_regno_allocno_class[i] = NO_REGS;
-  for (j = 0; j < ira_allocno_classes_num; j++)
-   {
- cl = ira_allocno_classes[j];
- if (ira_class_hard_reg_index[cl][i] >= 0)
-   {
- ira_hard_regno_allocno_class[i] = cl;
- break;
-   }
-   }
-#endif
 }
 }
 
@@ -5549,6 +5534,30 @@ static int saved_flag_ira_share_spill_slots;
 /* Set to true while in IRA.  */
 bool ira_in_progress = false;
 
+/* Set up array ira_hard_regno_nrefs.  */
+static void
+setup_hard_regno_nrefs (void)
+{
+  int i;
+
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+{
+  ira_hard_regno_nrefs[i] = 0;
+  for (df_ref use = DF_REG_USE_CHAIN (i);
+  use != NULL;
+  use = DF_REF_NEXT_REG (use))
+   if (DF_REF_CLASS (use) != DF_REF_ARTIFICIAL
+   && !(DF_REF_INSN_INFO (use) && DEBUG_INSN_P (DF_REF_INSN (use
+ ira_hard_regno_nrefs[i]++;
+  for (df_ref def = DF_REG_DEF_CHAIN (i);
+  def != NULL;
+  def = DF_REF_NEXT_REG (def))
+  

[gcc r15-7008] [PR118067][LRA]: Check secondary memory mode for the reg class

2025-01-17 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:9f009e8865cda01310c52f7ec8bdaa3c557a2745

commit r15-7008-g9f009e8865cda01310c52f7ec8bdaa3c557a2745
Author: Vladimir N. Makarov 
Date:   Fri Jan 17 15:56:29 2025 -0500

[PR118067][LRA]: Check secondary memory mode for the reg class

  This is the second patch for the PR for the new test.  The patch
solves problem in the case when secondary memory mode (SImode in the
PR test) returned by hook secondary_memory_needed_mode can not be used
for reg class (ALL_MASK_REGS) involved in secondary memory moves.  The
patch uses reg mode instead of one returned by
secondary_memory_needed_mode in this case.

gcc/ChangeLog:

PR rtl-optimization/118067
* lra-constraints.cc (invalid_mode_reg_p): New function.
(curr_insn_transform): Use it to check mode returned by target
secondary_memory_needed_mode.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr118067-2.c: New.

Diff:
---
 gcc/lra-constraints.cc | 17 +
 gcc/testsuite/gcc.target/i386/pr118067-2.c | 16 
 2 files changed, 33 insertions(+)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 3d5abcfaeb0b..cd19da294db3 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -4129,6 +4129,19 @@ swap_operands (int nop)
   lra_update_dup (curr_id, nop + 1);
 }
 
+/* Return TRUE if X is a (subreg of) reg and there are no hard regs of X class
+   which can contain value of MODE.  */
+static bool invalid_mode_reg_p (enum machine_mode mode, rtx x)
+{
+  if (SUBREG_P (x))
+x = SUBREG_REG (x);
+  if (! REG_P (x))
+return false;
+  enum reg_class rclass = get_reg_class (REGNO (x));
+  return hard_reg_set_subset_p (ira_prohibited_class_mode_regs[rclass][mode],
+   reg_class_contents[rclass]);
+}
+
 /* Main entry point of the constraint code: search the body of the
current insn to choose the best alternative.  It is mimicking insn
alternative cost calculation model of former reload pass.  That is
@@ -4389,6 +4402,10 @@ curr_insn_transform (bool check_only_p)
   rld = partial_subreg_p (GET_MODE (src), GET_MODE (dest)) ? src : dest;
   rld_mode = GET_MODE (rld);
   sec_mode = targetm.secondary_memory_needed_mode (rld_mode);
+  if (rld_mode != sec_mode
+ && (invalid_mode_reg_p (sec_mode, dest)
+ || invalid_mode_reg_p (sec_mode, src)))
+   sec_mode = rld_mode;
   new_reg = lra_create_new_reg (sec_mode, NULL_RTX, NO_REGS, NULL,
"secondary");
   /* If the mode is changed, it should be wider.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr118067-2.c 
b/gcc/testsuite/gcc.target/i386/pr118067-2.c
new file mode 100644
index ..831871db0b43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr118067-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O -fno-split-wide-types -mavx512f -mcpu=k8" } */
+
+typedef unsigned short U __attribute__((__vector_size__(64)));
+typedef int V __attribute__((__vector_size__(64)));
+typedef __int128 W __attribute__((__vector_size__(64)));
+
+W
+foo(U u, V v)
+{
+  W w;
+  /* __asm__ volatile ("" : "=v"(w)); prevents the -Wuninitialized warning */
+  u[0] >>= 1;
+  v %= (V)w;
+  return (W)u + (W)v;
+}


[gcc r15-7083] [PR118560][LRA]: Fix typo in checking secondary memory mode for the reg class

2025-01-20 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:07f62ed9a7b09951f83855e19d41641b098190b1

commit r15-7083-g07f62ed9a7b09951f83855e19d41641b098190b1
Author: Vladimir N. Makarov 
Date:   Mon Jan 20 17:08:50 2025 -0500

[PR118560][LRA]: Fix typo in checking secondary memory mode for the reg 
class

  The patch for PR118067 wrongly checked hard reg set subset.  It worked for
the equal sets as in PR118067.  But it was wrong in other cases as in
PR118560 (inordinate compile time).

gcc/ChangeLog:

PR target/118560
* lra-constraints.cc (invalid_mode_reg_p): Exchange args in
hard_reg_set_subset_p call.

Diff:
---
 gcc/lra-constraints.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index cd19da294db3..797222c9fbc2 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -4138,8 +4138,8 @@ static bool invalid_mode_reg_p (enum machine_mode mode, 
rtx x)
   if (! REG_P (x))
 return false;
   enum reg_class rclass = get_reg_class (REGNO (x));
-  return hard_reg_set_subset_p (ira_prohibited_class_mode_regs[rclass][mode],
-   reg_class_contents[rclass]);
+  return hard_reg_set_subset_p (reg_class_contents[rclass],
+   ira_prohibited_class_mode_regs[rclass][mode]);
 }
 
 /* Main entry point of the constraint code: search the body of the


[gcc r15-7250] [PR118663][LRA]: Change secondary memory mode only if there are regs holding the changed mode

2025-01-28 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:01339d29b7663d85eea6145eac2b1ad1da428c11

commit r15-7250-g01339d29b7663d85eea6145eac2b1ad1da428c11
Author: Vladimir N. Makarov 
Date:   Tue Jan 28 08:37:33 2025 -0500

[PR118663][LRA]: Change secondary memory mode only if there are regs 
holding the changed mode

  My recent patch for PR118067 changes the secondary memory mode if
all regs of the pseudo reg class are prohibited in the secondary mode.
But the patch does not check a special case when the
corresponding target hook returns this mode although there are no hard
regs of pseudo class holding value of the mode at all.  This results
in given PR and this patch fixes it.

gcc/ChangeLog:

PR target/118663
* lra-constraints.cc (invalid_mode_reg_p): Check empty
reg_class_contents.

gcc/testsuite/ChangeLog:

PR target/118663
* gcc.target/powerpc/pr118663.c: New.

Diff:
---
 gcc/lra-constraints.cc  |  6 --
 gcc/testsuite/gcc.target/powerpc/pr118663.c | 10 ++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 797222c9fbc2..ee3fd7a503aa 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -4138,8 +4138,10 @@ static bool invalid_mode_reg_p (enum machine_mode mode, 
rtx x)
   if (! REG_P (x))
 return false;
   enum reg_class rclass = get_reg_class (REGNO (x));
-  return hard_reg_set_subset_p (reg_class_contents[rclass],
-   ira_prohibited_class_mode_regs[rclass][mode]);
+  return (!hard_reg_set_empty_p (reg_class_contents[rclass])
+ && hard_reg_set_subset_p
+(reg_class_contents[rclass],
+ ira_prohibited_class_mode_regs[rclass][mode]));
 }
 
 /* Main entry point of the constraint code: search the body of the
diff --git a/gcc/testsuite/gcc.target/powerpc/pr118663.c 
b/gcc/testsuite/gcc.target/powerpc/pr118663.c
new file mode 100644
index ..8d3cbe07fc1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr118663.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-options "-mcpu=601 -w -O2 -m64" } */
+
+extern void bar (void);
+void
+foo (_Decimal32 *dst, _Decimal32 src)
+{
+  bar ();
+  *dst = src;
+}


[gcc r15-6968] [PR118067][LRA]: Use the right mode to evaluate secondary memory reload

2025-01-16 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:d9835825b3d7193b3d6669174f4386be2cb1

commit r15-6968-gd9835825b3d7193b3d6669174f4386be2cb1
Author: Vladimir N. Makarov 
Date:   Thu Jan 16 12:17:31 2025 -0500

[PR118067][LRA]: Use the right mode to evaluate secondary memory reload

  In the PR case, LRA made insn alternative costly.  It happened
because LRA incorrectly found that the alternative needs 2nd memory
reload as the wrong mode for targetm.secondary_memory_needed was used.
This resulted in LRA cycling as an alternative with mask regs was
chosen.  The patch fixes the PR and add more debug printing which
could be useful in the future for debugging function
process_alt_operands.

gcc/ChangeLog:

PR rtl-optimization/1180167
* lra-constraints.cc (process_alt_operands): Use operand mode not
subreg reg mode.  Add and improve debugging prints for updating
losers.

gcc/testsuite/ChangeLog:

PR rtl-optimization/118067
* gcc.target/i386/pr118067.c: New.

Diff:
---
 gcc/lra-constraints.cc   | 41 +++-
 gcc/testsuite/gcc.target/i386/pr118067.c | 16 +
 2 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 8f32e98f1c47..3d5abcfaeb0b 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -2465,6 +2465,11 @@ process_alt_operands (int only_alternative)
&& (operand_reg[m] == NULL_RTX
|| hard_regno[m] < 0))
  {
+   if (lra_dump_file != NULL)
+ fprintf
+   (lra_dump_file,
+"%d Matched operand reload: "
+"losers++\n", m);
losers++;
reload_nregs
  += (ira_reg_class_max_nregs[curr_alt[m]]
@@ -2909,6 +2914,10 @@ process_alt_operands (int only_alternative)
   "Strict low subreg reload -- refuse\n");
  goto fail;
}
+ if (lra_dump_file != NULL)
+   fprintf
+ (lra_dump_file,
+  "%d Operand reload: losers++\n", nop);
  losers++;
}
  if (operand_reg[nop] != NULL_RTX
@@ -2945,7 +2954,14 @@ process_alt_operands (int only_alternative)
{
  const_to_mem = 1;
  if (! no_regs_p)
-   losers++;
+   {
+ if (lra_dump_file != NULL)
+   fprintf
+ (lra_dump_file,
+  "%d Constant reload through memory: "
+  "losers++\n", nop);
+ losers++;
+   }
}
 
  /* Alternative loses if it requires a type of reload not
@@ -3127,12 +3143,19 @@ process_alt_operands (int only_alternative)
  if (this_alternative != NO_REGS
  && REG_P (op) && (cl = get_reg_class (REGNO (op))) != NO_REGS
  && ((curr_static_id->operand[nop].type != OP_OUT
-  && targetm.secondary_memory_needed (GET_MODE (op), cl,
+  && targetm.secondary_memory_needed (mode, cl,
   this_alternative))
  || (curr_static_id->operand[nop].type != OP_IN
  && (targetm.secondary_memory_needed
- (GET_MODE (op), this_alternative, cl)
-   losers++;
+ (mode, this_alternative, cl)
+   {
+ if (lra_dump_file != NULL)
+   fprintf
+ (lra_dump_file,
+  "%d Secondary memory reload needed: "
+  "losers++\n", nop);
+ losers++;
+   }
 
  if (MEM_P (op) && offmemok)
addr_losers++;
@@ -3346,7 +3369,7 @@ process_alt_operands (int only_alternative)
  if (lra_dump_file != NULL)
fprintf
  (lra_dump_file,
-  "%d Conflict early clobber reload: reject--\n",
+  "%d Conflict early clobber reload: losers++\n",
   i);
}
  else
@@ -3358,6 +3381,12 @@ process_alt_operands (int only_alternative)
  {
curr_alt_match_win[j] = false;
losers++;
+   if (lra_dump_file != NULL)
+ fprintf
+   (lra_dump_file,
+"%d Matchi

[gcc r15-6796] [PR118017][LRA]: Fix test for i686

2025-01-10 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:94d8de53388793f4d5fc0d0aa00fef32ca4aa870

commit r15-6796-g94d8de53388793f4d5fc0d0aa00fef32ca4aa870
Author: Vladimir N. Makarov 
Date:   Fri Jan 10 10:36:24 2025 -0500

[PR118017][LRA]: Fix test for i686

My previous patch for PR118017 contains a test which fails on i686.  The 
patch fixes this.

gcc/testsuite/ChangeLog:

PR target/118017
* gcc.target/i386/pr118017.c: Check target int128.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr118017.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr118017.c 
b/gcc/testsuite/gcc.target/i386/pr118017.c
index c82d71e8d293..28797a0ad73f 100644
--- a/gcc/testsuite/gcc.target/i386/pr118017.c
+++ b/gcc/testsuite/gcc.target/i386/pr118017.c
@@ -1,5 +1,5 @@
 /* PR target/118017 */
-/* { dg-do compile } */
+/* { dg-do compile { target int128 } } */
 /* { dg-options "-Og -frounding-math -mno-80387 -mno-mmx -Wno-psabi" } */
 
 typedef __attribute__((__vector_size__ (64))) _Float128 F;


[gcc r15-7716] [PR119021][LRA]: Fix rtl correctness check failure in LRA.

2025-02-26 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:7ce3a8e872d945d537a7e7ba1bd3f45b1cf9a6d2

commit r15-7716-g7ce3a8e872d945d537a7e7ba1bd3f45b1cf9a6d2
Author: Vladimir N. Makarov 
Date:   Wed Feb 26 11:28:08 2025 -0500

[PR119021][LRA]: Fix rtl correctness check failure in LRA.

  Patch to fix PR115458 contained a code change in dealing with asm
errors to avoid cycling in reporting the error for asm gotos.  This
code was wrong and resulted in checking RTL correctness failure.  This
patch reverts the code change and solves cycling in asm error
reporting in a different way.

gcc/ChangeLog:

PR middle-end/119021
* lra.cc (lra_asm_insn_error): Use lra_invalidate_insn_data
instead of lra_update_insn_regno_info.
* lra-assigns.cc (lra_split_hard_reg_for): Restore old code.

Diff:
---
 gcc/lra-assigns.cc | 5 -
 gcc/lra.cc | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/gcc/lra-assigns.cc b/gcc/lra-assigns.cc
index 480925ad8943..46f9c9d20e25 100644
--- a/gcc/lra-assigns.cc
+++ b/gcc/lra-assigns.cc
@@ -1856,11 +1856,6 @@ lra_split_hard_reg_for (bool fail_p)
  {
asm_p = true;
lra_asm_insn_error (insn);
-   if (JUMP_P (insn))
- ira_nullify_asm_goto (insn);
-   else
- PATTERN (insn) = gen_rtx_USE (VOIDmode, const0_rtx);
-   lra_invalidate_insn_data (insn);
  }
else if (!asm_p)
  {
diff --git a/gcc/lra.cc b/gcc/lra.cc
index b753729d43d9..8f30284e9daa 100644
--- a/gcc/lra.cc
+++ b/gcc/lra.cc
@@ -549,7 +549,7 @@ lra_asm_insn_error (rtx_insn *insn)
   if (JUMP_P (insn))
 {
   ira_nullify_asm_goto (insn);
-  lra_update_insn_regno_info (insn);
+  lra_invalidate_insn_data (insn);
 }
   else
 {


[gcc r15-7700] [PR115458][LRA]: Run split sub-pass more times

2025-02-25 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:2341f675edadd6370147d2bc55ca7761a7ecfaa1

commit r15-7700-g2341f675edadd6370147d2bc55ca7761a7ecfaa1
Author: Vladimir N. Makarov 
Date:   Tue Feb 25 15:01:15 2025 -0500

[PR115458][LRA]: Run split sub-pass more times

  In this PR case LRA needs to provide too many hard regs for insn
reloads, where some reload pseudos require 8 aligned regs for
themselves.  As the last attempt, LRA tries to split live ranges of
hard regs for insn reload pseudos.  It is a very rare case.  An
inheritance pseudo involving a reload pseudo of the insn can be
spilled in the assignment sub-pass run right after splitting and we need
to run split sub-pass for the inheritance pseudo now.

gcc/ChangeLog:

PR target/115458
* lra-int.h (LRA_MAX_FAILED_SPLITS): Define and check its value.
(lra_split_hard_reg_for): Change prototype.
* lra.cc (lra): Try to split hard reg range several times after a
failure.
* lra-assigns.cc (lra_split_hard_reg_for): Add an arg, a flag of
giving up.  Report asm error and nullify the asm insn depending on
the arg value.

gcc/testsuite/ChangeLog:

PR target/115458
* g++.target/riscv/pr115458.C: New.

Diff:
---
 gcc/lra-assigns.cc|  50 +++--
 gcc/lra-int.h |  14 +-
 gcc/lra.cc|  14 +-
 gcc/testsuite/g++.target/riscv/pr115458.C | 357 ++
 4 files changed, 410 insertions(+), 25 deletions(-)

diff --git a/gcc/lra-assigns.cc b/gcc/lra-assigns.cc
index f9e3dfc3d5af..480925ad8943 100644
--- a/gcc/lra-assigns.cc
+++ b/gcc/lra-assigns.cc
@@ -1763,12 +1763,13 @@ find_reload_regno_insns (int regno, rtx_insn * &start, 
rtx_insn * &finish)
   return true;
 }
 
-/* Process reload pseudos which did not get a hard reg, split a hard
-   reg live range in live range of a reload pseudo, and then return
-   TRUE.  If we did not split a hard reg live range, report an error,
-   and return FALSE.  */
+/* Process reload pseudos which did not get a hard reg, split a hard reg live
+   range in live range of a reload pseudo, and then return TRUE.  Otherwise,
+   return FALSE.  When FAIL_P is TRUE and if we did not split a hard reg live
+   range for failed reload pseudos, report an error and modify related asm
+   insns.  */
 bool
-lra_split_hard_reg_for (void)
+lra_split_hard_reg_for (bool fail_p)
 {
   int i, regno;
   rtx_insn *insn, *first, *last;
@@ -1843,23 +1844,30 @@ lra_split_hard_reg_for (void)
   regno = u;
   bitmap_ior_into (&failed_reload_insns,
   &lra_reg_info[regno].insn_bitmap);
-  lra_setup_reg_renumber
-   (regno, ira_class_hard_regs[lra_get_allocno_class (regno)][0], false);
-}
-  EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi)
-{
-  insn = lra_insn_recog_data[u]->insn;
-  if (asm_noperands (PATTERN (insn)) >= 0)
-   {
- asm_p = true;
- lra_asm_insn_error (insn);
-   }
-  else if (!asm_p)
-   {
- error ("unable to find a register to spill");
- fatal_insn ("this is the insn:", insn);
-   }
+  if (fail_p)
+   lra_setup_reg_renumber
+ (regno, ira_class_hard_regs[lra_get_allocno_class (regno)][0], false);
 }
+  if (fail_p)
+EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi)
+  {
+   insn = lra_insn_recog_data[u]->insn;
+   if (asm_noperands (PATTERN (insn)) >= 0)
+ {
+   asm_p = true;
+   lra_asm_insn_error (insn);
+   if (JUMP_P (insn))
+ ira_nullify_asm_goto (insn);
+   else
+ PATTERN (insn) = gen_rtx_USE (VOIDmode, const0_rtx);
+   lra_invalidate_insn_data (insn);
+ }
+   else if (!asm_p)
+ {
+   error ("unable to find a register to spill");
+   fatal_insn ("this is the insn:", insn);
+ }
+  }
   bitmap_clear (&failed_reload_pseudos);
   bitmap_clear (&failed_reload_insns);
   return false;
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 1f89e069c4f9..ad42f48cc822 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -252,6 +252,18 @@ typedef class lra_insn_recog_data *lra_insn_recog_data_t;
for preventing LRA cycling in a bug case.  */
 #define LRA_MAX_ASSIGNMENT_ITERATION_NUMBER 30
 
+/* Maximum allowed number of tries to split hard reg live ranges after failure
+   in assignment of reload pseudos.  Theoretical bound for the value is the
+   number of the insn reload pseudos plus the number of inheritance pseudos
+   generated from the reload pseudos.  This bound can be achieved when all the
+   reload pseudos and the inheritance pseudos require hard reg splitting for
+   their assignment.  This is extremely unlikely event.  */
+#define LRA_MAX_FAILED_SPLITS 10
+
+#if LRA_MAX_FAILED_SPLITS >= LRA_MAX_ASSIGNMENT_ITERATION_NUMBER
+#err

[gcc r15-7932] [PR114991][IRA]: Improve reg equiv invariant calculation

2025-03-11 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:e355fe414aa3aaf215c7dd9dd789ce217a1b458c

commit r15-7932-ge355fe414aa3aaf215c7dd9dd789ce217a1b458c
Author: Vladimir N. Makarov 
Date:   Mon Mar 10 16:26:59 2025 -0400

[PR114991][IRA]: Improve reg equiv invariant calculation

In PR test case IRA preferred to allocate hard reg to a pseudo instead
of its equivalence.  This resulted in allocating caller-saved hard reg
and generating save/restore insns in the function prologue/epilogue.
The equivalence is an invariant (stack pointer plus offset) and the
pseudo is used mostly as memory address.  This happened as there was
no simplification of insn after the invariant substitution.  The patch
adds the necessary code.

gcc/ChangeLog:

PR target/114991
* ira-costs.cc (equiv_can_be_consumed_p): Add new argument 
invariant_p.
Add code for dealing with the invariant.
(calculate_equiv_gains): Don't consider init insns.  Pass the new
argument to equiv_can_be_consumed_p.  Don't treat invariant as
memory.

gcc/testsuite/ChangeLog:

PR target/114991
* gcc.target/aarch64/pr114991.c: New test.

Diff:
---
 gcc/ira-costs.cc| 38 +
 gcc/testsuite/gcc.target/aarch64/pr114991.c | 15 
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
index a404e9f2690f..b568c7d03267 100644
--- a/gcc/ira-costs.cc
+++ b/gcc/ira-costs.cc
@@ -1788,11 +1788,27 @@ validate_autoinc_and_mem_addr_p (rtx x)
  MEM_ADDR_SPACE (x)));
 }
 
-/* Check that reg REGNO can be changed by TO in INSN.  Return true in case the
-   result insn would be valid one.  */
+/* Check that reg REGNO in INSN can be changed by TO (which is an invariant
+   equiv when INVARIANT_P is true).  Return true in case the result insn would
+   be valid one.  */
 static bool
-equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn)
+equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn, bool invariant_p)
 {
+  if (invariant_p)
+{
+  /* We use more expensive code for the invariant because we need to
+simplify the result insn as the invariant can be arithmetic rtx
+inserted into another arithmetic rtx.  */
+  rtx pat = PATTERN (insn);
+  int code = INSN_CODE (insn);
+  PATTERN (insn) = copy_rtx (pat);
+  PATTERN (insn)
+   = simplify_replace_rtx (PATTERN (insn), regno_reg_rtx[regno], to);
+  bool res = !insn_invalid_p (insn, false);
+  PATTERN (insn) = pat;
+  INSN_CODE (insn) = code;
+  return res;
+}
   validate_replace_src_group (regno_reg_rtx[regno], to, insn);
   /* We can change register to equivalent memory in autoinc rtl.  Some code
  including verify_changes assumes that autoinc contains only a register.
@@ -1910,6 +1926,14 @@ calculate_equiv_gains (void)
  || !get_equiv_regno (PATTERN (insn), regno, subreg)
  || !bitmap_bit_p (&equiv_pseudos, regno))
continue;
+
+ rtx_insn_list *x;
+ for (x = ira_reg_equiv[regno].init_insns; x != NULL; x = x->next ())
+   if (insn == x->insn ())
+ break;
+ if (x != NULL)
+   continue; /* skip equiv init insn */
+
  rtx subst = ira_reg_equiv[regno].memory;
 
  if (subst == NULL)
@@ -1919,13 +1943,17 @@ calculate_equiv_gains (void)
  ira_assert (subst != NULL);
  mode = PSEUDO_REGNO_MODE (regno);
  ira_init_register_move_cost_if_necessary (mode);
- bool consumed_p = equiv_can_be_consumed_p (regno, subst, insn);
+ bool consumed_p
+   = equiv_can_be_consumed_p (regno, subst, insn,
+  subst == ira_reg_equiv[regno].invariant);
 
  rclass = pref[COST_INDEX (regno)];
  if (MEM_P (subst)
  /* If it is a change of constant into double for example, the
 result constant probably will be placed in memory.  */
- || (subreg != NULL_RTX && !INTEGRAL_MODE_P (GET_MODE (subreg
+ || (ira_reg_equiv[regno].invariant == NULL
+ && subreg != NULL_RTX
+ && !INTEGRAL_MODE_P (GET_MODE (subreg
cost = ira_memory_move_cost[mode][rclass][1] + (consumed_p ? 0 : 1);
  else if (consumed_p)
continue;
diff --git a/gcc/testsuite/gcc.target/aarch64/pr114991.c 
b/gcc/testsuite/gcc.target/aarch64/pr114991.c
new file mode 100644
index ..d3c7bd131ddc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr114991.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-shrink-wrap" } */
+
+typedef struct { int arr[16]; } S;
+
+void g (S *);
+void h (S);
+void f(int x)
+{
+  S s;
+  g (&s);
+  h (s);
+}
+
+/* { dg-final { scan-assembler-not "\[ \t\]?str\[ \t\]x" } } */


[gcc r15-7730] [PR118940][LRA]: Add a test

2025-02-27 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:3071eb2848a2e748cfd67e8c897890ce06c69d06

commit r15-7730-g3071eb2848a2e748cfd67e8c897890ce06c69d06
Author: Vladimir N. Makarov 
Date:   Thu Feb 27 13:39:04 2025 -0500

[PR118940][LRA]: Add a test

PR115458 also solves given PR.  So the patch adds only a
test case which can be used for testing LRA work aspects different from
PR115458 test case.

gcc/testsuite/ChangeLog:

PR target/118940
* gcc.target/i386/pr118940.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/i386/pr118940.c | 127 +++
 1 file changed, 127 insertions(+)

diff --git a/gcc/testsuite/gcc.target/i386/pr118940.c 
b/gcc/testsuite/gcc.target/i386/pr118940.c
new file mode 100644
index ..be094310173c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr118940.c
@@ -0,0 +1,127 @@
+/* { dg-do compile } */
+/* { dg-options "-w -g -Os -march=i386 -mregparm=3 -m32 -fno-PIE" } */
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned int size_t;
+typedef uint32_t bigint_element_t;
+
+/**
+ * Define a big-integer type
+ *
+ * @v size Number of elements
+ * @ret bigint_t   Big integer type
+ */
+ #define bigint_t( size )  \
+ struct {  \
+  bigint_element_t element[ (size) ];  \
+ }
+
+/**
+* Determine number of elements required for a big-integer type
+*
+* @v len   Maximum length of big integer, in bytes
+* @ret sizeNumber of elements
+*/
+#define bigint_required_size( len )\
+ ( ( (len) + sizeof ( bigint_element_t ) - 1 ) /   \
+   sizeof ( bigint_element_t ) )
+
+/**
+ * Determine number of elements in big-integer type
+ *
+ * @v bigint   Big integer
+ * @ret size   Number of elements
+ */
+ #define bigint_size( bigint ) \
+ ( sizeof ( *(bigint) ) / sizeof ( (bigint)->element[0] ) )
+
+ /**
+ * Initialise big integer
+ *
+ * @v valueBig integer to initialise
+ * @v data Raw data
+ * @v len  Length of raw data
+ */
+#define bigint_init( value, data, len ) do {   \
+   unsigned int size = bigint_size (value);\
+   bigint_init_raw ( (value)->element, size, (data), (len) );  \
+   } while ( 0 )
+
+
+/**
+ * Calculate temporary working space required for moduluar exponentiation
+ *
+ * @v modulus  Big integer modulus
+ * @ret lenLength of temporary working space
+ */
+ #define bigint_mod_exp_tmp_len( modulus ) ( { \
+   unsigned int size = bigint_size (modulus);  \
+   sizeof ( struct {   \
+   bigint_t ( size ) temp[4];  \
+   } ); } )
+
+
+/**
+ * Initialise big integer
+ *
+ * @v value0   Element 0 of big integer to initialise
+ * @v size Number of elements
+ * @v data Raw data
+ * @v len  Length of raw data
+ */
+ static inline __attribute__ (( always_inline )) void
+ bigint_init_raw ( uint32_t *value0, unsigned int size,
+ const void *data, size_t len ) {
+  bigint_t ( size ) __attribute__ (( may_alias )) *value =
+   ( ( void * ) value0 );
+  long pad_len = ( sizeof ( *value ) - len );
+  void *discard_D;
+  long discard_c;
+
+  /* Copy raw data in reverse order, padding with zeros */
+  __asm__ __volatile__ ( "\n1:\n\t"
+   "movb -1(%3,%1), %%al\n\t"
+   "stosb\n\t"
+   "loop 1b\n\t"
+   "xorl %%eax, %%eax\n\t"
+   "mov %4, %1\n\t"
+   "rep stosb\n\t"
+   : "=&D" ( discard_D ), "=&c" ( discard_c ),
+  "+m" ( *value )
+   : "r" ( data ), "g" ( pad_len ), "0" ( value0 ),
+  "1" ( len )
+   : "eax" );
+ }
+
+extern void touch (void *, ...);
+extern void touch3 (void *, void *, void *);
+extern void touch2 (void *, void *);
+
+/**
+ * Perform big integer self-tests
+ *
+ */
+void bigint_test_exec ( void ) {
+do{
+   static const uint8_t base_raw[3] = {0};
+   static const uint8_t modulus_raw[3] = {0};
+   static const uint8_t exponent_raw[25] = {0};
+   unsigned int size =
+   bigint_required_size ( sizeof ( base_raw ) );
+   unsigned int exponent_size =
+   bigint_required_size ( sizeof ( exponent_raw ) );
+   bigint_t ( size ) base_temp;
+   bigint_t ( size ) modulus_temp;
+   bigint_t ( exponent_size ) exponent_temp;
+   size_t tmp_len = bigint_mod_exp_tmp_len ( &modulus_temp );
+
+
+   touch ( &base_temp );
+ 

[gcc r15-7729] [PR116336][LRA]: Add a test

2025-02-27 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:e59dd301aea9c8d5a5d04e808c87d591a26e85bb

commit r15-7729-ge59dd301aea9c8d5a5d04e808c87d591a26e85bb
Author: Vladimir N. Makarov 
Date:   Thu Feb 27 13:05:44 2025 -0500

[PR116336][LRA]: Add a test

  Patch for PR116234 solves given PR116366.  So the patch adds only the test
case which is very different from PR116234 one.

gcc/testsuite/ChangeLog:

PR rtl-optimization/116336
* gcc.dg/pr116336.c: New test.

Diff:
---
 gcc/testsuite/gcc.dg/pr116336.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/pr116336.c b/gcc/testsuite/gcc.dg/pr116336.c
new file mode 100644
index ..3e4f3d329c10
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116336.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -gno-statement-frontiers -fcompare-debug" } */
+
+double duk_js_execute_bytecode_duk__tv_0_0;
+double duk_double_div();
+void duk_handle_call_unprotected();
+void duk_js_execute_bytecode() {
+  double du_0;
+  long opcode_shifted;
+  switch (opcode_shifted)
+  case 2:
+du_0 = duk_double_div();
+  duk_js_execute_bytecode_duk__tv_0_0 = du_0;
+  duk_handle_call_unprotected();
+  duk_js_execute_bytecode();
+}


[gcc r15-8449] [PR119270][IRA]: Ignore equiv init insns for cost calculation for invariants only

2025-03-19 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:6c443e128802bd93158a3db7c4edf5fc1fc76c8d

commit r15-8449-g6c443e128802bd93158a3db7c4edf5fc1fc76c8d
Author: Vladimir N. Makarov 
Date:   Wed Mar 19 16:06:41 2025 -0400

[PR119270][IRA]: Ignore equiv init insns for cost calculation for 
invariants only

My previous patch for PR114991 contains code ignoring equiv init insns
for increasing cost of usage the equivalence.  Although common sense says
it is right thing to do, this results in more aggressive usage of
memory equivalence and significant performance degradation of SPEC2017
cactuBSSM.  Given patch restores previous cost calculation for all
equivalences except for invariant ones.

gcc/ChangeLog:

PR target/119270
* ira-costs.cc (calculate_equiv_gains): Ignore equiv init insns
only for invariants.

Diff:
---
 gcc/ira-costs.cc | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
index 70cba942a7b0..faf706ca5ddc 100644
--- a/gcc/ira-costs.cc
+++ b/gcc/ira-costs.cc
@@ -1926,12 +1926,15 @@ calculate_equiv_gains (void)
  || !bitmap_bit_p (&equiv_pseudos, regno))
continue;
 
- rtx_insn_list *x;
- for (x = ira_reg_equiv[regno].init_insns; x != NULL; x = x->next ())
-   if (insn == x->insn ())
- break;
- if (x != NULL)
-   continue; /* skip equiv init insn */
+ if (ira_reg_equiv[regno].invariant != NULL)
+   {
+ rtx_insn_list *x = ira_reg_equiv[regno].init_insns;
+ for (; x != NULL; x = x->next ())
+   if (insn == x->insn ())
+ break;
+ if (x != NULL)
+   continue; /* skip equiv init insn for invariant */
+   }
 
  rtx subst = ira_reg_equiv[regno].memory;


[gcc r15-8237] [PR119285][IRA]: Use an additional way of reg equiv invariant substitution correctness

2025-03-17 Thread Vladimir Makarov via Gcc-cvs
https://gcc.gnu.org/g:8e0e17677afc1a93aa31b8b83849848b7bb52b9b

commit r15-8237-g8e0e17677afc1a93aa31b8b83849848b7bb52b9b
Author: Vladimir N. Makarov 
Date:   Mon Mar 17 15:21:46 2025 -0400

[PR119285][IRA]: Use an additional way of reg equiv invariant substitution 
correctness

Patch for PR114991 resulted in 5% decrease of SPEC2017 lbm performance
on Zen2 and Zen4.  For one RTL insn of lbm, LRA with PR114991 patch
can not confirm that the equivalence insertion will create a valid RTL
insn.  This resulted in that the pseudo equiv was assumed costly and
pseudo was assigned to hard reg (caller saved as the pseudo lives
through calls) and some other pseudos did not get hard regs as it was
before PR114991 patch.  The insn in question is `pseudo1 = pseduo2 +
pseudo3` where pseudo2 has equiv `hard_reg + const`.  The old code
recognized the insn after equiv substitution as LEA.  The new code
failed.  The patch here makes to use two ways for equiv subsbtitution
correctness, the old one and new one (mostly for memory addresses
where the old code fails to find the substitution correctness).  So
given patch fixes lbm performance degradation and actually makes GCC
to generate the same code as it was before PR114991 patch.

gcc/ChangeLog:

PR rtl-optimization/119285
* ira-costs.cc (equiv_can_be_consumed_p): Use 2 ways for
recognizing a valid insn after equiv insertion.

Diff:
---
 gcc/ira-costs.cc | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
index b568c7d03267..70cba942a7b0 100644
--- a/gcc/ira-costs.cc
+++ b/gcc/ira-costs.cc
@@ -1794,29 +1794,28 @@ validate_autoinc_and_mem_addr_p (rtx x)
 static bool
 equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn, bool invariant_p)
 {
-  if (invariant_p)
+  validate_replace_src_group (regno_reg_rtx[regno], to, insn);
+  /* We can change register to equivalent memory in autoinc rtl.  Some code
+ including verify_changes assumes that autoinc contains only a register.
+ So check this first.  */
+  bool res = validate_autoinc_and_mem_addr_p (PATTERN (insn));
+  if (res)
+res = verify_changes (0);
+  cancel_changes (0);
+  if (!res && invariant_p)
 {
-  /* We use more expensive code for the invariant because we need to
+  /* Here we use more expensive code for the invariant because we need to
 simplify the result insn as the invariant can be arithmetic rtx
-inserted into another arithmetic rtx.  */
+inserted into another arithmetic rtx, e.g. into memory address.  */
   rtx pat = PATTERN (insn);
   int code = INSN_CODE (insn);
   PATTERN (insn) = copy_rtx (pat);
   PATTERN (insn)
= simplify_replace_rtx (PATTERN (insn), regno_reg_rtx[regno], to);
-  bool res = !insn_invalid_p (insn, false);
+  res = !insn_invalid_p (insn, false);
   PATTERN (insn) = pat;
   INSN_CODE (insn) = code;
-  return res;
 }
-  validate_replace_src_group (regno_reg_rtx[regno], to, insn);
-  /* We can change register to equivalent memory in autoinc rtl.  Some code
- including verify_changes assumes that autoinc contains only a register.
- So check this first.  */
-  bool res = validate_autoinc_and_mem_addr_p (PATTERN (insn));
-  if (res)
-res = verify_changes (0);
-  cancel_changes (0);
   return res;
 }