https://gcc.gnu.org/g:4b9e68a6f3b22800a7f12b58ef6b25e3b339bb3c

commit r15-628-g4b9e68a6f3b22800a7f12b58ef6b25e3b339bb3c
Author: Alexander Monakov <amona...@ispras.ru>
Date:   Wed May 15 16:23:17 2024 +0300

    tree-into-ssa: speed up sorting in prune_unused_phi_nodes [PR114480]
    
    In PR 114480 we are hitting a case where tree-into-ssa scales
    quadratically due to prune_unused_phi_nodes doing O(N log N)
    work for N basic blocks, for each variable individually.
    Sorting the 'defs' array is especially costly.
    
    It is possible to assist gcc_qsort by laying out dfs_out entries
    in the reverse order in the 'defs' array, starting from its tail.
    This is not always a win (in fact it flips most of 7-element qsorts
    in this testcase from 9 comparisons (best case) to 15 (worst case)),
    but overall it helps on the testcase and on libstdc++ build.
    On the testcase we go from 1.28e9 comparator invocations to 1.05e9,
    on libstdc++ from 2.91e6 to 2.84e6.
    
    gcc/ChangeLog:
    
            PR c++/114480
            * tree-into-ssa.cc (prune_unused_phi_nodes): Add dfs_out entries
            to the 'defs' array in the reverse order.

Diff:
---
 gcc/tree-into-ssa.cc | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-into-ssa.cc b/gcc/tree-into-ssa.cc
index 3732c269ca3d..5b367c358125 100644
--- a/gcc/tree-into-ssa.cc
+++ b/gcc/tree-into-ssa.cc
@@ -805,21 +805,22 @@ prune_unused_phi_nodes (bitmap phis, bitmap kills, bitmap 
uses)
      locate the nearest dominating def in logarithmic time by binary search.*/
   bitmap_ior (to_remove, kills, phis);
   n_defs = bitmap_count_bits (to_remove);
-  defs = XNEWVEC (struct dom_dfsnum, 2 * n_defs + 1);
+  adef = 2 * n_defs + 1;
+  defs = XNEWVEC (struct dom_dfsnum, adef);
   defs[0].bb_index = 1;
   defs[0].dfs_num = 0;
-  adef = 1;
+  struct dom_dfsnum *head = defs + 1, *tail = defs + adef;
   EXECUTE_IF_SET_IN_BITMAP (to_remove, 0, i, bi)
     {
       def_bb = BASIC_BLOCK_FOR_FN (cfun, i);
-      defs[adef].bb_index = i;
-      defs[adef].dfs_num = bb_dom_dfs_in (CDI_DOMINATORS, def_bb);
-      defs[adef + 1].bb_index = i;
-      defs[adef + 1].dfs_num = bb_dom_dfs_out (CDI_DOMINATORS, def_bb);
-      adef += 2;
+      head->bb_index = i;
+      head->dfs_num = bb_dom_dfs_in (CDI_DOMINATORS, def_bb);
+      head++, tail--;
+      tail->bb_index = i;
+      tail->dfs_num = bb_dom_dfs_out (CDI_DOMINATORS, def_bb);
     }
+  gcc_checking_assert (head == tail);
   BITMAP_FREE (to_remove);
-  gcc_assert (adef == 2 * n_defs + 1);
   qsort (defs, adef, sizeof (struct dom_dfsnum), cmp_dfsnum);
   gcc_assert (defs[0].bb_index == 1);

Reply via email to