https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116398
--- Comment #12 from rguenther at suse dot de <rguenther at suse dot de> --- On Fri, 14 Mar 2025, jakub at gcc dot gnu.org wrote: > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116398 > > --- Comment #10 from Jakub Jelinek <jakub at gcc dot gnu.org> --- > So, I've made an experiment > --- gcc/combine.cc.jj 2025-03-04 09:45:50.620584561 +0100 > +++ gcc/combine.cc 2025-03-14 10:57:29.164681115 +0100 > @@ -4204,7 +4204,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, > /* If I2 didn't change, this is not a combination (but a simplification or > canonicalisation with context), which should not be done here. Doing > it here explodes the algorithm. Don't. */ > - if (rtx_equal_p (newi2pat, PATTERN (i2))) > + if (rtx_equal_p (newi2pat, PATTERN (i2)) > + && DF_INSN_LUID (i3) - DF_INSN_LUID (i2) >= (getenv ("LUIDDIFF") ? atoi > (getenv ("LUIDDIFF")) : 0)) > { > if (dump_file) > fprintf (dump_file, "i2 didn't change, not doing this\n"); > On the #c0 testcase from this PR, LUIDDIFF=0 and LUIDDIFF=1 are the regression > case, LUIDDIFF=2 and above are the GCC 14 code. > On the > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101523#c10 > testcase for s390x-linux I get (x86_64-linux -> s390x-linux cross, built with > -O2 and the above hack): > $ for i in 0 10 100 200 400 600 800 1000 1200 10000; do echo LUIDDIFF=$i time > ./cc1plus -quiet -nostdinc -O2 pr101523.ii -mlong-double-128 -march=z196 > -fpreprocessed -w; LUIDDIFF=$i time ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w; done > LUIDDIFF=0 time ./cc1plus -quiet -nostdinc -O2 pr101523.ii -mlong-double-128 > -march=z196 -fpreprocessed -w > 13.77user 0.13system 0:13.97elapsed 99%CPU (0avgtext+0avgdata > 320320maxresident)k > 0inputs+776outputs (0major+105819minor)pagefaults 0swaps > LUIDDIFF=10 time ./cc1plus -quiet -nostdinc -O2 pr101523.ii -mlong-double-128 > -march=z196 -fpreprocessed -w > 13.88user 0.12system 0:14.05elapsed 99%CPU (0avgtext+0avgdata > 319656maxresident)k > 0inputs+784outputs (0major+106128minor)pagefaults 0swaps > LUIDDIFF=100 time ./cc1plus -quiet -nostdinc -O2 pr101523.ii -mlong-double-128 > -march=z196 -fpreprocessed -w > 14.22user 0.15system 0:14.44elapsed 99%CPU (0avgtext+0avgdata > 322476maxresident)k > 0inputs+800outputs (0major+106693minor)pagefaults 0swaps > LUIDDIFF=200 time ./cc1plus -quiet -nostdinc -O2 pr101523.ii -mlong-double-128 > -march=z196 -fpreprocessed -w > 14.05user 0.16system 0:14.25elapsed 99%CPU (0avgtext+0avgdata > 315804maxresident)k > 0inputs+816outputs (0major+109184minor)pagefaults 0swaps > LUIDDIFF=400 time ./cc1plus -quiet -nostdinc -O2 pr101523.ii -mlong-double-128 > -march=z196 -fpreprocessed -w > 15.44user 0.15system 0:15.66elapsed 99%CPU (0avgtext+0avgdata > 360208maxresident)k > 0inputs+816outputs (0major+120533minor)pagefaults 0swaps > LUIDDIFF=600 time ./cc1plus -quiet -nostdinc -O2 pr101523.ii -mlong-double-128 > -march=z196 -fpreprocessed -w > 17.47user 0.18system 0:17.70elapsed 99%CPU (0avgtext+0avgdata > 412556maxresident)k > 0inputs+816outputs (0major+133990minor)pagefaults 0swaps > LUIDDIFF=800 time ./cc1plus -quiet -nostdinc -O2 pr101523.ii -mlong-double-128 > -march=z196 -fpreprocessed -w > 18.75user 0.24system 0:19.07elapsed 99%CPU (0avgtext+0avgdata > 498380maxresident)k > 0inputs+832outputs (0major+156165minor)pagefaults 0swaps > LUIDDIFF=1000 time ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 > -march=z196 -fpreprocessed -w > 21.60user 0.26system 0:21.92elapsed 99%CPU (0avgtext+0avgdata > 599660maxresident)k > 0inputs+840outputs (0major+181821minor)pagefaults 0swaps > LUIDDIFF=1200 time ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 > -march=z196 -fpreprocessed -w > 23.00user 0.27system 0:23.36elapsed 99%CPU (0avgtext+0avgdata > 670684maxresident)k > 0inputs+824outputs (0major+199625minor)pagefaults 0swaps > LUIDDIFF=10000 time ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w > 149.81user 3.26system 2:33.68elapsed 99%CPU (0avgtext+0avgdata > 7216920maxresident)k > 0inputs+912outputs (0major+1844663minor)pagefaults 0swaps > $ rm pr101523.ii.*; LUIDDIFF=0 ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w -fdump-rtl-combine-stats; awk > '/^attempts:/{attempts+=$2}/^merges:/{merges+=$2}/^extras:/{extras+=$2}/^successes/{successes+=$2}/^two-insn > combine:/{two+=$3}/^three-insn combine:/{three+=$3}/^four-insn > combine:/{four+=$3}/^insn-with-note combine/{note+=$3}END{printf "attempts: > %d\nmerges: %d\nextras: %d\nsuccesses: %d\ntwo-insn combine: %d\nthree-insn > combine: %d\nfour-insn combine: %d\ninsn-with-note combine: %d\n", attempts, > merges, extras, successes, two, three, four, note}' pr101523.ii.*combine* > attempts: 22982 > merges: 21955 > extras: 5419 > successes: 115 > two-insn combine: 113 > three-insn combine: 2 > four-insn combine: 0 > insn-with-note combine: 0 > $ rm pr101523.ii.*; LUIDDIFF=10 ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w -fdump-rtl-combine-stats; awk > '/^attempts:/{attempts+=$2}/^merges:/{merges+=$2}/^extras:/{extras+=$2}/^successes/{successes+=$2}/^two-insn > combine:/{two+=$3}/^three-insn combine:/{three+=$3}/^four-insn > combine:/{four+=$3}/^insn-with-note combine/{note+=$3}END{printf "attempts: > %d\nmerges: %d\nextras: %d\nsuccesses: %d\ntwo-insn combine: %d\nthree-insn > combine: %d\nfour-insn combine: %d\ninsn-with-note combine: %d\n", attempts, > merges, extras, successes, two, three, four, note}' pr101523.ii.*combine* > attempts: 23474 > merges: 22422 > extras: 5587 > successes: 167 > two-insn combine: 166 > three-insn combine: 1 > four-insn combine: 0 > insn-with-note combine: 0 > $ rm pr101523.ii.*; LUIDDIFF=100 ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w -fdump-rtl-combine-stats; awk > '/^attempts:/{attempts+=$2}/^merges:/{merges+=$2}/^extras:/{extras+=$2}/^successes/{successes+=$2}/^two-insn > combine:/{two+=$3}/^three-insn combine:/{three+=$3}/^four-insn > combine:/{four+=$3}/^insn-with-note combine/{note+=$3}END{printf "attempts: > %d\nmerges: %d\nextras: %d\nsuccesses: %d\ntwo-insn combine: %d\nthree-insn > combine: %d\nfour-insn combine: %d\ninsn-with-note combine: %d\n", attempts, > merges, extras, successes, two, three, four, note}' pr101523.ii.*combine* > attempts: 76272 > merges: 74913 > extras: 21116 > successes: 801 > two-insn combine: 800 > three-insn combine: 1 > four-insn combine: 0 > insn-with-note combine: 0 > $ rm pr101523.ii.*; LUIDDIFF=200 ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w -fdump-rtl-combine-stats; awk > '/^attempts:/{attempts+=$2}/^merges:/{merges+=$2}/^extras:/{extras+=$2}/^successes/{successes+=$2}/^two-insn > combine:/{two+=$3}/^three-insn combine:/{three+=$3}/^four-insn > combine:/{four+=$3}/^insn-with-note combine/{note+=$3}END{printf "attempts: > %d\nmerges: %d\nextras: %d\nsuccesses: %d\ntwo-insn combine: %d\nthree-insn > combine: %d\nfour-insn combine: %d\ninsn-with-note combine: %d\n", attempts, > merges, extras, successes, two, three, four, note}' pr101523.ii.*combine* > attempts: 225950 > merges: 223585 > extras: 60074 > successes: 1457 > two-insn combine: 1456 > three-insn combine: 1 > four-insn combine: 0 > insn-with-note combine: 0 > $ rm pr101523.ii.*; LUIDDIFF=400 ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w -fdump-rtl-combine-stats; awk > '/^attempts:/{attempts+=$2}/^merges:/{merges+=$2}/^extras:/{extras+=$2}/^successes/{successes+=$2}/^two-insn > combine:/{two+=$3}/^three-insn combine:/{three+=$3}/^four-insn > combine:/{four+=$3}/^insn-with-note combine/{note+=$3}END{printf "attempts: > %d\nmerges: %d\nextras: %d\nsuccesses: %d\ntwo-insn combine: %d\nthree-insn > combine: %d\nfour-insn combine: %d\ninsn-with-note combine: %d\n", attempts, > merges, extras, successes, two, three, four, note}' pr101523.ii.*combine* > attempts: 562238 > merges: 558672 > extras: 142181 > successes: 2223 > two-insn combine: 2222 > three-insn combine: 1 > four-insn combine: 0 > insn-with-note combine: 0 > $ rm pr101523.ii.*; LUIDDIFF=600 ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w -fdump-rtl-combine-stats; awk > '/^attempts:/{attempts+=$2}/^merges:/{merges+=$2}/^extras:/{extras+=$2}/^successes/{successes+=$2}/^two-insn > combine:/{two+=$3}/^three-insn combine:/{three+=$3}/^four-insn > combine:/{four+=$3}/^insn-with-note combine/{note+=$3}END{printf "attempts: > %d\nmerges: %d\nextras: %d\nsuccesses: %d\ntwo-insn combine: %d\nthree-insn > combine: %d\nfour-insn combine: %d\ninsn-with-note combine: %d\n", attempts, > merges, extras, successes, two, three, four, note}' pr101523.ii.*combine* > attempts: 949334 > merges: 943646 > extras: 232455 > successes: 2720 > two-insn combine: 2719 > three-insn combine: 1 > four-insn combine: 0 > insn-with-note combine: 0 > $ rm pr101523.ii.*; LUIDDIFF=800 ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w -fdump-rtl-combine-stats; awk > '/^attempts:/{attempts+=$2}/^merges:/{merges+=$2}/^extras:/{extras+=$2}/^successes/{successes+=$2}/^two-insn > combine:/{two+=$3}/^three-insn combine:/{three+=$3}/^four-insn > combine:/{four+=$3}/^insn-with-note combine/{note+=$3}END{printf "attempts: > %d\nmerges: %d\nextras: %d\nsuccesses: %d\ntwo-insn combine: %d\nthree-insn > combine: %d\nfour-insn combine: %d\ninsn-with-note combine: %d\n", attempts, > merges, extras, successes, two, three, four, note}' pr101523.ii.*combine* > attempts: 1584155 > merges: 1576840 > extras: 375785 > successes: 3305 > two-insn combine: 3304 > three-insn combine: 1 > four-insn combine: 0 > insn-with-note combine: 0 > $ rm pr101523.ii.*; LUIDDIFF=1000 ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w -fdump-rtl-combine-stats; awk > '/^attempts:/{attempts+=$2}/^merges:/{merges+=$2}/^extras:/{extras+=$2}/^successes/{successes+=$2}/^two-insn > combine:/{two+=$3}/^three-insn combine:/{three+=$3}/^four-insn > combine:/{four+=$3}/^insn-with-note combine/{note+=$3}END{printf "attempts: > %d\nmerges: %d\nextras: %d\nsuccesses: %d\ntwo-insn combine: %d\nthree-insn > combine: %d\nfour-insn combine: %d\ninsn-with-note combine: %d\n", attempts, > merges, extras, successes, two, three, four, note}' pr101523.ii.*combine* > attempts: 2302888 > merges: 2294229 > extras: 536374 > successes: 3836 > two-insn combine: 3835 > three-insn combine: 1 > four-insn combine: 0 > insn-with-note combine: 0 > $ rm pr101523.ii.*; LUIDDIFF=1200 ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w -fdump-rtl-combine-stats; awk > '/^attempts:/{attempts+=$2}/^merges:/{merges+=$2}/^extras:/{extras+=$2}/^successes/{successes+=$2}/^two-insn > combine:/{two+=$3}/^three-insn combine:/{three+=$3}/^four-insn > combine:/{four+=$3}/^insn-with-note combine/{note+=$3}END{printf "attempts: > %d\nmerges: %d\nextras: %d\nsuccesses: %d\ntwo-insn combine: %d\nthree-insn > combine: %d\nfour-insn combine: %d\ninsn-with-note combine: %d\n", attempts, > merges, extras, successes, two, three, four, note}' pr101523.ii.*combine* > attempts: 2805076 > merges: 2795853 > extras: 648411 > successes: 4138 > two-insn combine: 4137 > three-insn combine: 1 > four-insn combine: 0 > insn-with-note combine: 0 > $ rm pr101523.ii.*; LUIDDIFF=10000 ./cc1plus -quiet -nostdinc -O2 pr101523.ii > -mlong-double-128 -march=z196 -fpreprocessed -w -fdump-rtl-combine-stats; awk > '/^attempts:/{attempts+=$2}/^merges:/{merges+=$2}/^extras:/{extras+=$2}/^successes/{successes+=$2}/^two-insn > combine:/{two+=$3}/^three-insn combine:/{three+=$3}/^four-insn > combine:/{four+=$3}/^insn-with-note combine/{note+=$3}END{printf "attempts: > %d\nmerges: %d\nextras: %d\nsuccesses: %d\ntwo-insn combine: %d\nthree-insn > combine: %d\nfour-insn combine: %d\ninsn-with-note combine: %d\n", attempts, > merges, extras, successes, two, three, four, note}' pr101523.ii.*combine* > attempts: 31960714 > merges: 31936996 > extras: 7194023 > successes: 8924 > two-insn combine: 8923 > three-insn combine: 1 > four-insn combine: 0 > insn-with-note combine: 0 > > So, I'd think punting only if the DF_INSN_LUID difference is >= something in > between 600 and 1200 is IMHO reasonable, say for 1000 the compiler will need > less than twice compiler memory than vanilla trunk and around 1.5x longer to > compile, but bet (haven't done full bootstraps/regtests with statistics > gathering) that most of the real-world cases on most arches are within 1000 > insns. > > But, do DEBUG_INSNs have their DF_INSN_LUID as well, so would that > DF_INSN_LUID > difference cause -fcompare-debug issues? Yes, it would :/