commit:     8cd7b247ed5101dbf81e3bf761f15207319aa2d4
Author:     Sam James <sam <AT> gentoo <DOT> org>
AuthorDate: Mon Apr  7 07:07:29 2025 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Mon Apr  7 07:07:29 2025 +0000
URL:        https://gitweb.gentoo.org/proj/gcc-patches.git/commit/?id=8cd7b247

15.0.0: drop combine patches merged upstream

Signed-off-by: Sam James <sam <AT> gentoo.org>

 ...ow-2-2-combinations-but-with-a-tweak-PR11.patch | 204 ------------------
 ...id-split_i2i3-search-if-i2-is-unchanged-P.patch |  54 -----
 ...Optimise-distribute_links-search-PR116398.patch |  99 ---------
 ...it-insn-searchs-for-2-2-combinations-PR11.patch | 240 ---------------------
 ...implify-rtx-Fix-shortcut-for-vector-eq-ne.patch | 131 -----------
 15.0.0/gentoo/README.history                       |   5 +
 6 files changed, 5 insertions(+), 728 deletions(-)

diff --git 
a/15.0.0/gentoo/83_all_combine-Allow-2-2-combinations-but-with-a-tweak-PR11.patch
 
b/15.0.0/gentoo/83_all_combine-Allow-2-2-combinations-but-with-a-tweak-PR11.patch
deleted file mode 100644
index fd85bcd..0000000
--- 
a/15.0.0/gentoo/83_all_combine-Allow-2-2-combinations-but-with-a-tweak-PR11.patch
+++ /dev/null
@@ -1,204 +0,0 @@
-https://inbox.sourceware.org/gcc-patches/[email protected]/
-
-From de2f5e9c4ce4bb674fb0da191102838a6bdd92f6 Mon Sep 17 00:00:00 2001
-Message-ID: 
<de2f5e9c4ce4bb674fb0da191102838a6bdd92f6.1743793443.git....@gentoo.org>
-From: Richard Sandiford <[email protected]>
-Date: Fri, 4 Apr 2025 10:21:43 +0100
-Subject: [PATCH 1/4] combine: Allow 2->2 combinations, but with a tweak
- [PR116398]
-
-One of the problems in PR101523 was that, after each successful
-2->2 combination attempt, try_combine would restart combination
-attempts at i2 even if i2 hadn't changed.  This led to quadratic
-behaviour as the same failed combinations between i2 and i3 were
-tried repeatedly.
-
-The original patch for the PR dealt with that by disallowing 2->2
-combinations.  However, that led to various optimisation regressions,
-so there was interest in allowing the combinations again, at least
-until an alternative way of getting the same results is in place.
-
-This patch is a variant of Richi's in:
-
-  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101523#c53
-
-but limited to when we're combining 2 instructions.
-
-This speeds up combine by 10x on the original PR101523 testcase
-and reduces combine's memory footprint by 100x.
-
-gcc/
-       PR testsuite/116398
-       * combine.cc (try_combine): Reallow 2->2 combinations.  Detect when
-       only i3 has changed and restart from i3 in that case.
-
-gcc/testsuite/
-       * gcc.target/aarch64/popcnt-le-1.c: Account for commutativity of TST.
-       * gcc.target/aarch64/popcnt-le-3.c: Likewise AND.
-       * gcc.target/aarch64/sve/pred-not-gen-1.c: Revert previous patch.
-       * gcc.target/aarch64/sve/pred-not-gen-4.c: Likewise.
-       * gcc.target/aarch64/sve/var_stride_2.c: Likewise.
-       * gcc.target/aarch64/sve/var_stride_4.c: Likewise.
-
-Co-authored-by: Richard Biener <[email protected]>
----
- gcc/combine.cc                                     | 14 ++++----------
- gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c     |  4 ++--
- gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c     |  4 ++--
- gcc/testsuite/gcc.target/aarch64/pr100056.c        |  4 +++-
- .../gcc.target/aarch64/sve/pred-not-gen-1.c        |  4 ++--
- .../gcc.target/aarch64/sve/pred-not-gen-4.c        |  4 ++--
- .../gcc.target/aarch64/sve/var_stride_2.c          |  3 ++-
- .../gcc.target/aarch64/sve/var_stride_4.c          |  3 ++-
- 8 files changed, 19 insertions(+), 21 deletions(-)
-
-diff --git a/gcc/combine.cc b/gcc/combine.cc
-index 1b6c4e314cc9..65a87a45b3be 100644
---- a/gcc/combine.cc
-+++ b/gcc/combine.cc
-@@ -4210,16 +4210,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
-       adjust_for_new_dest (i3);
-     }
- 
--  /* If I2 didn't change, this is not a combination (but a simplification or
--     canonicalisation with context), which should not be done here.  Doing
--     it here explodes the algorithm.  Don't.  */
--  if (rtx_equal_p (newi2pat, PATTERN (i2)))
--    {
--      if (dump_file)
--      fprintf (dump_file, "i2 didn't change, not doing this\n");
--      undo_all ();
--      return 0;
--    }
-+  bool only_i3_changed = !i0 && !i1 && rtx_equal_p (newi2pat, PATTERN (i2));
- 
-   /* We now know that we can do this combination.  Merge the insns and
-      update the status of registers and LOG_LINKS.  */
-@@ -4787,6 +4778,9 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
-   combine_successes++;
-   undo_commit ();
- 
-+  if (only_i3_changed)
-+    return i3;
-+
-   rtx_insn *ret = newi2pat ? i2 : i3;
-   if (added_links_insn && DF_INSN_LUID (added_links_insn) < DF_INSN_LUID 
(ret))
-     ret = added_links_insn;
-diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c 
b/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c
-index b4141da982c9..843fdac9fd8e 100644
---- a/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c
-+++ b/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c
-@@ -8,7 +8,7 @@
- /*
- ** le32:
- **    sub     w([0-9]+), w0, #1
--**    tst     w0, w\1
-+**    tst     (?:w0, w\1|w\1, w0)
- **    cset    w0, eq
- **    ret
- */
-@@ -20,7 +20,7 @@ unsigned le32 (const unsigned int a) {
- /*
- ** gt32:
- **    sub     w([0-9]+), w0, #1
--**    tst     w0, w\1
-+**    tst     (?:w0, w\1|w\1, w0)
- **    cset    w0, ne
- **    ret
- */
-diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c 
b/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c
-index b811e6f6e8fe..3b558e95d819 100644
---- a/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c
-+++ b/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c
-@@ -8,7 +8,7 @@
- /*
- ** le16:
- **    sub     w([0-9]+), w0, #1
--**    and     w([0-9]+), w0, w\1
-+**    and     w([0-9]+), (?:w0, w\1|w\1, w0)
- **    tst     w\2, 65535
- **    cset    w0, eq
- **    ret
-@@ -21,7 +21,7 @@ unsigned le16 (const unsigned short a) {
- /*
- ** gt16:
- **    sub     w([0-9]+), w0, #1
--**    and     w([0-9]+), w0, w\1
-+**    and     w([0-9]+), (?:w0, w\1|w\1, w0)
- **    tst     w\2, 65535
- **    cset    w0, ne
- **    ret
-diff --git a/gcc/testsuite/gcc.target/aarch64/pr100056.c 
b/gcc/testsuite/gcc.target/aarch64/pr100056.c
-index 0b77824da457..70499772d285 100644
---- a/gcc/testsuite/gcc.target/aarch64/pr100056.c
-+++ b/gcc/testsuite/gcc.target/aarch64/pr100056.c
-@@ -1,7 +1,9 @@
- /* PR target/100056 */
- /* { dg-do compile } */
- /* { dg-options "-O2" } */
--/* { dg-final { scan-assembler-not {\t[us]bfiz\tw[0-9]+, w[0-9]+, 11} } } */
-+/* { dg-final { scan-assembler-not {\t[us]bfiz\tw[0-9]+, w[0-9]+, 11} { xfail 
*-*-* } } } */
-+/* { dg-final { scan-assembler-times {\t[us]bfiz\tw[0-9]+, w[0-9]+, 11} 2 } } 
*/
-+/* { dg-final { scan-assembler-times {\tadd\tw[0-9]+, w[0-9]+, w[0-9]+, 
uxtb\n} 2 } } */
- 
- int
- or_shift_u8 (unsigned char i)
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
-index a7d2795ebe23..c9a8b82c48ac 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
-@@ -19,6 +19,6 @@ void f10(double * restrict z, double * restrict w, double * 
restrict x, double *
-     }
- }
- 
--/* { dg-final { scan-assembler-not {\tbic\t} { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} 1 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-not {\tbic\t} } } */
-+/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} 1 } } */
- /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, 
z[0-9]+\.d, #0} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
-index 20cbd7550b7e..1845bd3f0f70 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
-@@ -8,6 +8,6 @@ void f13(double * restrict z, double * restrict w, double * 
restrict x, double *
-     }
- }
- 
--/* { dg-final { scan-assembler-not {\tbic\t} { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} 1 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-not {\tbic\t} } } */
-+/* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} 1 } } */
- /* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-9]+/z, 
z[0-9]+\.d, z[0-9]+\.d} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
-index 33b9f0f197e4..b8afea70207f 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
-@@ -16,7 +16,8 @@ f (TYPE *x, TYPE *y, unsigned short n, unsigned short m)
- /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
- /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
- /* Should multiply by (257-1)*4 rather than (VF-1)*4 or (VF-2)*4.  */
--/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x[0-9]+, x[0-9]+, lsl 
10\n} 2 } } */
-+/* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x2, 10, 16\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x3, 10, 16\n} 1 } } */
- /* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
- /* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
- /* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_4.c
-index 71b826a4c1bb..d2e74f9d4175 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_4.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_4.c
-@@ -16,7 +16,8 @@ f (TYPE *x, TYPE *y, int n, int m)
- /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
- /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
- /* Should multiply by (257-1)*4 rather than (VF-1)*4.  */
--/* { dg-final { scan-assembler-times {\t(?:lsl\tx[0-9]+, x[0-9]+, 
10|sbfiz\tx[0-9]+, x[0-9]+, 10, 32)\n} 2 } } */
-+/* { dg-final { scan-assembler-times {\tsbfiz\tx[0-9]+, x2, 10, 32\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tsbfiz\tx[0-9]+, x3, 10, 32\n} 1 } } */
- /* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */
- /* { dg-final { scan-assembler {\tcmp\tw3, 0} } } */
- /* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 4 } } */
-
-base-commit: d25728c98682c058bfda79333c94b0a8cf2a3f49
--- 
-2.49.0
-

diff --git 
a/15.0.0/gentoo/84_all_combine-Avoid-split_i2i3-search-if-i2-is-unchanged-P.patch
 
b/15.0.0/gentoo/84_all_combine-Avoid-split_i2i3-search-if-i2-is-unchanged-P.patch
deleted file mode 100644
index 19d4917..0000000
--- 
a/15.0.0/gentoo/84_all_combine-Avoid-split_i2i3-search-if-i2-is-unchanged-P.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-https://inbox.sourceware.org/gcc-patches/[email protected]/
-
-From fac8faddc3fca52b6d51c0d1a57323012a1fe4e9 Mon Sep 17 00:00:00 2001
-Message-ID: 
<fac8faddc3fca52b6d51c0d1a57323012a1fe4e9.1743793443.git....@gentoo.org>
-In-Reply-To: 
<de2f5e9c4ce4bb674fb0da191102838a6bdd92f6.1743793443.git....@gentoo.org>
-References: 
<de2f5e9c4ce4bb674fb0da191102838a6bdd92f6.1743793443.git....@gentoo.org>
-From: Richard Sandiford <[email protected]>
-Date: Fri, 4 Apr 2025 10:22:12 +0100
-Subject: [PATCH 2/4] combine: Avoid split_i2i3 search if i2 is unchanged
- [PR116398]
-
-When combining a single-set i2 into a multi-set i3, combine
-first tries to match the new multi-set in-place.  If that fails,
-combine considers splitting the multi-set so that one set goes in
-i2 and the other set stays in i3.  That moves a destination from i3
-to i2 and so combine needs to update any associated log link for that
-destination to point to i2 rather than i3.
-
-However, that kind of split can also occur for 2->2 combinations.
-For a 2-instruction combination in which i2 doesn't die in i3, combine
-tries a 2->1 combination by turning i3 into a parallel of the original
-i2 and the combined i3.  If that fails, combine will split the parallel
-as above, so that the first set goes in i2 and the second set goes in i3.
-But that can often leave i2 unchanged, meaning that no destinations have
-moved and so no search is necessary.
-
-gcc/
-       PR testsuite/116398
-       * combine.cc (try_combine): Shortcut the split_i2i3 handling if
-       i2 is unchanged.
----
- gcc/combine.cc | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/gcc/combine.cc b/gcc/combine.cc
-index 65a87a45b3be..e29cff7147d9 100644
---- a/gcc/combine.cc
-+++ b/gcc/combine.cc
-@@ -4212,6 +4212,12 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
- 
-   bool only_i3_changed = !i0 && !i1 && rtx_equal_p (newi2pat, PATTERN (i2));
- 
-+  /* If only i3 has changed, any split of the combined instruction just
-+     restored i2 to its original state.  No destinations moved from i3
-+     to i2.  */
-+  if (only_i3_changed)
-+    split_i2i3 = false;
-+
-   /* We now know that we can do this combination.  Merge the insns and
-      update the status of registers and LOG_LINKS.  */
- 
--- 
-2.49.0
-

diff --git 
a/15.0.0/gentoo/85_all_combine-Optimise-distribute_links-search-PR116398.patch 
b/15.0.0/gentoo/85_all_combine-Optimise-distribute_links-search-PR116398.patch
deleted file mode 100644
index f8849ba..0000000
--- 
a/15.0.0/gentoo/85_all_combine-Optimise-distribute_links-search-PR116398.patch
+++ /dev/null
@@ -1,99 +0,0 @@
-https://inbox.sourceware.org/gcc-patches/[email protected]/
-
-From bc27a2f371602bf897ff4da1620a56eeeeebf8ff Mon Sep 17 00:00:00 2001
-Message-ID: 
<bc27a2f371602bf897ff4da1620a56eeeeebf8ff.1743793443.git....@gentoo.org>
-In-Reply-To: 
<de2f5e9c4ce4bb674fb0da191102838a6bdd92f6.1743793443.git....@gentoo.org>
-References: 
<de2f5e9c4ce4bb674fb0da191102838a6bdd92f6.1743793443.git....@gentoo.org>
-From: Richard Sandiford <[email protected]>
-Date: Fri, 4 Apr 2025 10:22:32 +0100
-Subject: [PATCH 3/4] combine: Optimise distribute_links search [PR116398]
-
-Another problem in PR101523 was that, after each successful 2->2
-combination attempt, distribute_links would search further and further
-for the next combinable use of the i2 destination.  Each search would
-start at i2 itself, making the search quadratic in the worst case.
-
-In a 2->2 combination, if i2 is unchanged, the search can start at i3
-instead of i2.  The same thing applies to i2 when distributing i2's
-links, since the only changes to earlier instructions are the deletion
-of i0 and i1.
-
-This change, combined with the previous split_i2i3 patch, gives a
-34.6% speedup in combine for the testcase in PR101523.  Combine
-goes from being 41% to 34% of compile time.
-
-gcc/
-       PR testsuite/116398
-       * combine.cc (distribute_links): Take an optional start point.
-       (try_combine): If only i3 has changed, only distribute i3's links,
-       not i2's.  Start the search for the new use from i3 rather than
-       from the definition instruction.  Likewise start the search for
-       the new use from i2 when distributing i2's links.
----
- gcc/combine.cc | 27 +++++++++++++++++++--------
- 1 file changed, 19 insertions(+), 8 deletions(-)
-
-diff --git a/gcc/combine.cc b/gcc/combine.cc
-index e29cff7147d9..e99b064c98d4 100644
---- a/gcc/combine.cc
-+++ b/gcc/combine.cc
-@@ -472,7 +472,7 @@ static void move_deaths (rtx, rtx, int, rtx_insn *, rtx *);
- static bool reg_bitfield_target_p (rtx, rtx);
- static void distribute_notes (rtx, rtx_insn *, rtx_insn *, rtx_insn *,
-                             rtx, rtx, rtx);
--static void distribute_links (struct insn_link *);
-+static void distribute_links (struct insn_link *, rtx_insn * = nullptr);
- static void mark_used_regs_combine (rtx);
- static void record_promoted_value (rtx_insn *, rtx);
- static bool unmentioned_reg_p (rtx, rtx);
-@@ -4592,10 +4592,15 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
-                           NULL_RTX, NULL_RTX, NULL_RTX);
-       }
- 
--    distribute_links (i3links);
--    distribute_links (i2links);
--    distribute_links (i1links);
--    distribute_links (i0links);
-+    if (only_i3_changed)
-+      distribute_links (i3links, i3);
-+    else
-+      {
-+      distribute_links (i3links);
-+      distribute_links (i2links, i2);
-+      distribute_links (i1links);
-+      distribute_links (i0links);
-+      }
- 
-     if (REG_P (i2dest))
-       {
-@@ -14986,10 +14991,13 @@ distribute_notes (rtx notes, rtx_insn *from_insn, 
rtx_insn *i3, rtx_insn *i2,
- 
- /* Similarly to above, distribute the LOG_LINKS that used to be present on
-    I3, I2, and I1 to new locations.  This is also called to add a link
--   pointing at I3 when I3's destination is changed.  */
-+   pointing at I3 when I3's destination is changed.
-+
-+   If START is nonnull and an insn, we know that the next location for each
-+   link is no earlier than START.  */
- 
- static void
--distribute_links (struct insn_link *links)
-+distribute_links (struct insn_link *links, rtx_insn *start)
- {
-   struct insn_link *link, *next_link;
- 
-@@ -15055,7 +15063,10 @@ distribute_links (struct insn_link *links)
-        I3 to I2.  Also note that not much searching is typically done here
-        since most links don't point very far away.  */
- 
--      for (insn = NEXT_INSN (link->insn);
-+      insn = start;
-+      if (!insn || NOTE_P (insn))
-+      insn = NEXT_INSN (link->insn);
-+      for (;
-          (insn && (this_basic_block->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
-                    || BB_HEAD (this_basic_block->next_bb) != insn));
-          insn = NEXT_INSN (insn))
--- 
-2.49.0
-

diff --git 
a/15.0.0/gentoo/86_all_combine-Limit-insn-searchs-for-2-2-combinations-PR11.patch
 
b/15.0.0/gentoo/86_all_combine-Limit-insn-searchs-for-2-2-combinations-PR11.patch
deleted file mode 100644
index b185cba..0000000
--- 
a/15.0.0/gentoo/86_all_combine-Limit-insn-searchs-for-2-2-combinations-PR11.patch
+++ /dev/null
@@ -1,240 +0,0 @@
-https://inbox.sourceware.org/gcc-patches/[email protected]/
-
-From 247ac81832821bade9fa36c350d220dca039d383 Mon Sep 17 00:00:00 2001
-Message-ID: 
<247ac81832821bade9fa36c350d220dca039d383.1743793443.git....@gentoo.org>
-In-Reply-To: 
<de2f5e9c4ce4bb674fb0da191102838a6bdd92f6.1743793443.git....@gentoo.org>
-References: 
<de2f5e9c4ce4bb674fb0da191102838a6bdd92f6.1743793443.git....@gentoo.org>
-From: Richard Sandiford <[email protected]>
-Date: Fri, 4 Apr 2025 10:23:00 +0100
-Subject: [PATCH 4/4] combine: Limit insn searchs for 2->2 combinations
- [PR116398]
-
-As noted in the previous patch, combine still takes >30% of
-compile time in the original testcase for PR101523.  The problem
-is that try_combine uses linear insn searches for some dataflow
-queries, so in the worst case, an unlimited number of 2->2
-combinations for the same i2 can lead to quadratic behaviour.
-
-This patch limits distribute_links to a certain number
-of instructions when i2 is unchanged.  As Segher said in the PR trail,
-it would make more conceptual sense to apply the limit unconditionally,
-but I thought it would be better to change as little as possible at
-this development stage.  Logically, in stage 1, the --param should
-be applied directly by distribute_links with no input from callers.
-
-As I mentioned in:
-
-  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116398#c28
-
-I think it's safe to drop log links even if a use exists.  All
-processing of log links seems to handle the absence of a link
-for a particular register in a conservative way.
-
-The initial set-up errs on the side of dropping links, since for example
-create_log_links has:
-
-             /* flow.c claimed:
-
-                 We don't build a LOG_LINK for hard registers contained
-                 in ASM_OPERANDs.  If these registers get replaced,
-                 we might wind up changing the semantics of the insn,
-                 even if reload can make what appear to be valid
-                 assignments later.  */
-              if (regno < FIRST_PSEUDO_REGISTER
-                  && asm_noperands (PATTERN (use_insn)) >= 0)
-                continue;
-
-which excludes combinations by dropping log links, rather than during
-try_combine.  And:
-
-      /* If this register is being initialized using itself, and the
-         register is uninitialized in this basic block, and there are
-         no LOG_LINKS which set the register, then part of the
-         register is uninitialized.  In that case we can't assume
-         anything about the number of nonzero bits.
-
-         ??? We could do better if we checked this in
-         reg_{nonzero_bits,num_sign_bit_copies}_for_combine.  Then we
-         could avoid making assumptions about the insn which initially
-         sets the register, while still using the information in other
-         insns.  We would have to be careful to check every insn
-         involved in the combination.  */
-
-      if (insn
-          && reg_referenced_p (x, PATTERN (insn))
-          && !REGNO_REG_SET_P (DF_LR_IN (BLOCK_FOR_INSN (insn)),
-                               REGNO (x)))
-        {
-          struct insn_link *link;
-
-          FOR_EACH_LOG_LINK (link, insn)
-            if (dead_or_set_p (link->insn, x))
-              break;
-          if (!link)
-            {
-              rsp->nonzero_bits = GET_MODE_MASK (mode);
-              rsp->sign_bit_copies = 1;
-              return;
-            }
-        }
-
-treats the lack of a log link as a possible sign of uninitialised data,
-but that would be a missed optimisation rather than a correctness issue.
-
-One question is what the default --param value should be.  I went with
-Jakub's suggestion of 3000 from:
-
-  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116398#c25
-
-Also, to answer Jakub's question in that comment, I tried bisecting:
-
-  int limit = atoi (getenv ("BISECT"));
-
-(so applying the limit for all calls from try_combine) with an
-abort in distribute_links if the limit caused a link to be skipped.
-The minimum BISECT value that allowed an aarch64-linux-gnu bootstrap
-to succeed with --enable-languages=all --enable-checking=yes,rtl,extra
-was 142, so much lower than the parameter value.  I realised too late
-that --enable-checking=release would probably have been a more
-interesting test.
-
-The previous patch meant that distribute_links itself is now linear
-for a given i2 definition, since each search starts at the previous
-last use, rather than at i2 itself.  This means that the limit has
-to be applied cumulatively across all searches for the same link.
-
-The patch does that by storing a counter in the insn_link structure.
-There was a 32-bit hole there on LP64 hosts.
-
-gcc/
-       PR testsuite/116398
-       * params.opt (-param=max-combine-search-insns=): New param.
-       * doc/invoke.texi: Document it.
-       * combine.cc (insn_link::insn_count): New field.
-       (alloc_insn_link): Initialize it.
-       (distribute_links): Add a limit parameter.
-       (try_combine): Use the new param to limit distribute_links
-       when only i3 has changed.
----
- gcc/combine.cc      | 21 +++++++++++++++++----
- gcc/doc/invoke.texi |  9 +++++++++
- gcc/params.opt      |  4 ++++
- 3 files changed, 30 insertions(+), 4 deletions(-)
-
-diff --git a/gcc/combine.cc b/gcc/combine.cc
-index e99b064c98d4..5f085187cfef 100644
---- a/gcc/combine.cc
-+++ b/gcc/combine.cc
-@@ -309,6 +309,7 @@ static int *uid_insn_cost;
- struct insn_link {
-   rtx_insn *insn;
-   unsigned int regno;
-+  int insn_count;
-   struct insn_link *next;
- };
- 
-@@ -342,6 +343,7 @@ alloc_insn_link (rtx_insn *insn, unsigned int regno, 
struct insn_link *next)
-                                         sizeof (struct insn_link));
-   l->insn = insn;
-   l->regno = regno;
-+  l->insn_count = 0;
-   l->next = next;
-   return l;
- }
-@@ -472,7 +474,8 @@ static void move_deaths (rtx, rtx, int, rtx_insn *, rtx *);
- static bool reg_bitfield_target_p (rtx, rtx);
- static void distribute_notes (rtx, rtx_insn *, rtx_insn *, rtx_insn *,
-                             rtx, rtx, rtx);
--static void distribute_links (struct insn_link *, rtx_insn * = nullptr);
-+static void distribute_links (struct insn_link *, rtx_insn * = nullptr,
-+                            int limit = INT_MAX);
- static void mark_used_regs_combine (rtx);
- static void record_promoted_value (rtx_insn *, rtx);
- static bool unmentioned_reg_p (rtx, rtx);
-@@ -4593,7 +4596,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, 
rtx_insn *i0,
-       }
- 
-     if (only_i3_changed)
--      distribute_links (i3links, i3);
-+      distribute_links (i3links, i3, param_max_combine_search_insns);
-     else
-       {
-       distribute_links (i3links);
-@@ -14994,10 +14997,12 @@ distribute_notes (rtx notes, rtx_insn *from_insn, 
rtx_insn *i3, rtx_insn *i2,
-    pointing at I3 when I3's destination is changed.
- 
-    If START is nonnull and an insn, we know that the next location for each
--   link is no earlier than START.  */
-+   link is no earlier than START.  LIMIT is the maximum number of nondebug
-+   instructions that can be scanned when looking for the next use of a
-+   definition.  */
- 
- static void
--distribute_links (struct insn_link *links, rtx_insn *start)
-+distribute_links (struct insn_link *links, rtx_insn *start, int limit)
- {
-   struct insn_link *link, *next_link;
- 
-@@ -15063,9 +15068,12 @@ distribute_links (struct insn_link *links, rtx_insn 
*start)
-        I3 to I2.  Also note that not much searching is typically done here
-        since most links don't point very far away.  */
- 
-+      int count = 0;
-       insn = start;
-       if (!insn || NOTE_P (insn))
-       insn = NEXT_INSN (link->insn);
-+      else
-+      count = link->insn_count;
-       for (;
-          (insn && (this_basic_block->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
-                    || BB_HEAD (this_basic_block->next_bb) != insn));
-@@ -15086,6 +15094,11 @@ distribute_links (struct insn_link *links, rtx_insn 
*start)
-         }
-       else if (INSN_P (insn) && reg_set_p (reg, insn))
-         break;
-+      else if (count >= limit)
-+        break;
-+      else
-+        count += 1;
-+      link->insn_count = count;
- 
-       /* If we found a place to put the link, place it there unless there
-        is already a link to the same insn as LINK at that point.  */
-diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
-index 4c9af429ab08..0903cd2b7f23 100644
---- a/gcc/doc/invoke.texi
-+++ b/gcc/doc/invoke.texi
-@@ -16518,6 +16518,15 @@ in combiner for a pseudo register as last known value 
of that register.
- @item max-combine-insns
- The maximum number of instructions the RTL combiner tries to combine.
- 
-+@item max-combine-search-insns
-+The maximum number of instructions that the RTL combiner searches in order
-+to find the next use of a given register definition.  If this limit is reached
-+without finding such a use, the combiner will stop trying to optimize the
-+definition.
-+
-+Currently this limit only applies after certain successful combination
-+attempts, but it could be extended to other cases in future.
-+
- @item integer-share-limit
- Small integer constants can use a shared data structure, reducing the
- compiler's memory usage and increasing its speed.  This sets the maximum
-diff --git a/gcc/params.opt b/gcc/params.opt
-index 4f4eb4d7a2a5..422d082b3557 100644
---- a/gcc/params.opt
-+++ b/gcc/params.opt
-@@ -477,6 +477,10 @@ The maximum number of instructions to consider to unroll 
in a loop on average.
- Common Joined UInteger Var(param_max_combine_insns) Init(4) IntegerRange(2, 
4) Param Optimization
- The maximum number of insns combine tries to combine.
- 
-+-param=max-combine-search-insns=
-+Common Joined UInteger Var(param_max_combine_search_insns) Init(3000) Param 
Optimization
-+The maximum number of instructions that combine searches in order to find the 
next use of a particular register.
-+
- -param=max-completely-peel-loop-nest-depth=
- Common Joined UInteger Var(param_max_unroll_iterations) Init(8) Param 
Optimization
- The maximum depth of a loop nest we completely peel.
--- 
-2.49.0
-

diff --git 
a/15.0.0/gentoo/87_all_simplify-rtx-Fix-shortcut-for-vector-eq-ne.patch 
b/15.0.0/gentoo/87_all_simplify-rtx-Fix-shortcut-for-vector-eq-ne.patch
deleted file mode 100644
index 76c3552..0000000
--- a/15.0.0/gentoo/87_all_simplify-rtx-Fix-shortcut-for-vector-eq-ne.patch
+++ /dev/null
@@ -1,131 +0,0 @@
-https://inbox.sourceware.org/gcc-patches/[email protected]/
-
-From 5cc0c07a938202390b612f4e47934c09a17cad9a Mon Sep 17 00:00:00 2001
-Message-ID: 
<5cc0c07a938202390b612f4e47934c09a17cad9a.1743817315.git....@gentoo.org>
-From: Richard Sandiford <[email protected]>
-Date: Tue, 1 Apr 2025 13:16:32 +0100
-Subject: [PATCH] simplify-rtx: Fix shortcut for vector eq/ne
-
-This patch forestalls a regression in gcc.dg/rtl/x86_64/vector_eq.c
-with the patch for PR116398.  The test wants:
-
-      (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int 0) 
(const_int 0) (const_int 0) (const_int 0)])))
-      (cinsn 5 (set (reg:V4SI <2>)
-                   (eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>))))
-
-to be folded to a vector of -1s.  One unusual thing about the fold
-is that the <1> in the second insn is uninitialised; it looks like
-it should be replaced by <0>, or that there should be an insn 4 that
-copies <0> to <1>.
-
-As it stands, the test relies on init-regs to insert a zero
-initialisation of <1>.  This happens after all the cse/pre/fwprop
-stuff, with only dce passes between init-regs and combine.
-Combine therefore sees:
-
-(insn 3 2 8 2 (set (reg:V4SI 98)
-        (const_vector:V4SI [
-                (const_int 0 [0]) repeated x4
-            ])) 2403 {movv4si_internal}
-     (nil))
-(insn 8 3 9 2 (clobber (reg:V4SI 99)) -1
-     (nil))
-(insn 9 8 5 2 (set (reg:V4SI 99)
-        (const_vector:V4SI [
-                (const_int 0 [0]) repeated x4
-            ])) -1
-     (nil))
-(insn 5 9 7 2 (set (reg:V4SI 100)
-        (eq:V4SI (reg:V4SI 98)
-            (reg:V4SI 99))) 7874 {*sse2_eqv4si3}
-     (expr_list:REG_DEAD (reg:V4SI 99)
-        (expr_list:REG_DEAD (reg:V4SI 98)
-            (expr_list:REG_EQUAL (eq:V4SI (const_vector:V4SI [
-                            (const_int 0 [0]) repeated x4
-                        ])
-                    (reg:V4SI 99))
-                (nil)))))
-
-It looks like the test should then pass through a 3, 9 -> 5 combination,
-so that we get an (eq ...) between two zeros and fold it to a vector
-of -1s.  But although the combination is attempted, the fold doesn't
-happen.  Instead, combine is left to match the unsimplified (eq ...)
-between two zeros, which rightly fails.  The test only passes because
-late_combine2 happens to try simplifying an (eq ...) between reg X and
-reg X, which does fold to a vector of -1s.
-
-The different handling of registers and constants is due to this
-code in simplify_const_relational_operation:
-
-  if (INTEGRAL_MODE_P (mode) && trueop1 != const0_rtx
-      && (code == EQ || code == NE)
-      && ! ((REG_P (op0) || CONST_INT_P (trueop0))
-           && (REG_P (op1) || CONST_INT_P (trueop1)))
-      && (tem = simplify_binary_operation (MINUS, mode, op0, op1)) != 0
-      /* We cannot do this if tem is a nonzero address.  */
-      && ! nonzero_address_p (tem))
-    return simplify_const_relational_operation (signed_condition (code),
-                                               mode, tem, const0_rtx);
-
-INTEGRAL_MODE_P matches vector integer modes, but everything else
-about the condition is written for scalar integers only.  Thus if
-trueop0 and trueop1 are equal vector constants, we'll bypass all
-the exclusions and try simplifying a subtraction.  This will succeed,
-giving a vector of zeros.  The recursive call will then try to simplify
-a comparison between the vector of zeros and const0_rtx, which isn't
-well-formed.  Luckily or unluckily, the ill-formedness doesn't trigger
-an ICE, but it does prevent any simplification from happening.
-
-The least-effort fix would be to replace INTEGRAL_MODE_P with
-SCALAR_INT_MODE_P.  But the fold does make conceptual sense for
-vectors too, so it seemed better to keep the INTEGRAL_MODE_P and
-generalise the rest of the condition to match.
-
-Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?
-
-I'm hoping to post the actual patch for PR116398 later today.
-
-Richard
-
-gcc/
-       * simplify-rtx.cc (simplify_const_relational_operation): Generalize
-       the constant checks in the fold-via-minus path to match the
-       INTEGRAL_MODE_P condition.
----
- gcc/simplify-rtx.cc | 13 +++++++++----
- 1 file changed, 9 insertions(+), 4 deletions(-)
-
-diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
-index fe007bc7d96a..6f969effdf99 100644
---- a/gcc/simplify-rtx.cc
-+++ b/gcc/simplify-rtx.cc
-@@ -6657,15 +6657,20 @@ simplify_const_relational_operation (enum rtx_code 
code,
-      we do not know the signedness of the operation on either the left or
-      the right hand side of the comparison.  */
- 
--  if (INTEGRAL_MODE_P (mode) && trueop1 != const0_rtx
-+  if (INTEGRAL_MODE_P (mode)
-+      && trueop1 != CONST0_RTX (mode)
-       && (code == EQ || code == NE)
--      && ! ((REG_P (op0) || CONST_INT_P (trueop0))
--          && (REG_P (op1) || CONST_INT_P (trueop1)))
-+      && ! ((REG_P (op0)
-+           || CONST_SCALAR_INT_P (trueop0)
-+           || CONST_VECTOR_P (trueop0))
-+          && (REG_P (op1)
-+              || CONST_SCALAR_INT_P (trueop1)
-+              || CONST_VECTOR_P (trueop1)))
-       && (tem = simplify_binary_operation (MINUS, mode, op0, op1)) != 0
-       /* We cannot do this if tem is a nonzero address.  */
-       && ! nonzero_address_p (tem))
-     return simplify_const_relational_operation (signed_condition (code),
--                                              mode, tem, const0_rtx);
-+                                              mode, tem, CONST0_RTX (mode));
- 
-   if (! HONOR_NANS (mode) && code == ORDERED)
-     return const_true_rtx;
-
-base-commit: 2f62e66e1444d858e937faa7147f6cd06296b33b
--- 
-2.49.0
-

diff --git a/15.0.0/gentoo/README.history b/15.0.0/gentoo/README.history
index 20d64c4..51cd6a7 100644
--- a/15.0.0/gentoo/README.history
+++ b/15.0.0/gentoo/README.history
@@ -1,5 +1,10 @@
 54     ????
 
+       - 83_all_combine-Allow-2-2-combinations-but-with-a-tweak-PR11.patch
+       - 84_all_combine-Avoid-split_i2i3-search-if-i2-is-unchanged-P.patch
+       - 85_all_combine-Optimise-distribute_links-search-PR116398.patch
+       - 86_all_combine-Limit-insn-searchs-for-2-2-combinations-PR11.patch
+       - 87_all_simplify-rtx-Fix-shortcut-for-vector-eq-ne.patch
        U 88_all_PR119614.patch
 
 53     7 April 2025


Reply via email to