On Thu, 14 Mar 2013, Richard Biener wrote: > > This extracts pieces from the already posted patch series that are > most worthwhile and applicable for backporting to both 4.8 and 4.7. > It also re-implements the limiting of the maximum number of memory > references to consider for LIMs dependence analysis. This limiting > is now done per loop-nest and disables optimizing outer loops > only. The limiting requires backporting introduction of the > shared unalalyzable mem-ref - it works by marking that as stored > in loops we do not want to compute dependences for - which makes > dependence computation for mems in those loops linear, as that > mem-ref, which conveniently has ID 0, is tested first. > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > The current limit of 1000 datarefs is quite low (well, for LIMs > purposes, that is), and I only bothered to care about -O1 for > backports (no caching of the affine combination). With the > limit in place and at -O1 LIM now takes > > tree loop invariant motion: 0.55 ( 1%) usr > > for the testcase in PR39326. Four patches in total, we might > consider not backporting the limiting, without it this > insane testcase has, at ~2GB memory usage (peak determined by IRA) > > tree loop invariant motion: 533.30 (77%) usr > > but avoids running into the DSE / combine issue (and thus stays > managable overall at -O1). With limiting it requires -fno-dse > to not blow up (>5GB of memory use).
Note that the limiting patch (below) causes code-generation differences because it collects memory-references in a different order and store-motion applies its transform in order of mem-ref IDs (different order of loads / stores and different decl UIDs). The different ordering results in quite a big speedup because bitmaps have a more regular form (maybe only for this testcase though). Richard. > 2013-03-14 Richard Biener <rguent...@suse.de> > > PR tree-optimization/39326 > * tree-ssa-loop-im.c: Include diagnostic-core.h. > (mark_ref_stored): Optimize. > (gather_mem_refs_stmt): Also set all_refs_stored_bit if stored. > (create_vop_ref_mapping_loop, create_vop_ref_mapping): Remove > and fold functionality into ... > (gather_mem_refs_in_loops): ... this. Iterate over loops, > counting memory references and punting when more than > --param loop-max-datarefs-for-datadeps. > (analyze_memory_references): Adjust. > > Index: trunk/gcc/tree-ssa-loop-im.c > =================================================================== > *** trunk.orig/gcc/tree-ssa-loop-im.c 2013-03-14 12:52:37.000000000 +0100 > --- trunk/gcc/tree-ssa-loop-im.c 2013-03-14 14:23:47.533164359 +0100 > *************** along with GCC; see the file COPYING3. > *** 20,25 **** > --- 20,26 ---- > #include "config.h" > #include "system.h" > #include "coretypes.h" > + #include "diagnostic-core.h" > #include "tm.h" > #include "tree.h" > #include "tm_p.h" > *************** record_mem_ref_loc (mem_ref_p ref, struc > *** 1551,1561 **** > static void > mark_ref_stored (mem_ref_p ref, struct loop *loop) > { > ! for (; > ! loop != current_loops->tree_root > ! && !bitmap_bit_p (ref->stored, loop->num); > ! loop = loop_outer (loop)) > ! bitmap_set_bit (ref->stored, loop->num); > } > > /* Gathers memory references in statement STMT in LOOP, storing the > --- 1552,1560 ---- > static void > mark_ref_stored (mem_ref_p ref, struct loop *loop) > { > ! while (loop != current_loops->tree_root > ! && bitmap_set_bit (ref->stored, loop->num)) > ! loop = loop_outer (loop); > } > > /* Gathers memory references in statement STMT in LOOP, storing the > *************** gather_mem_refs_stmt (struct loop *loop, > *** 1618,1624 **** > } > bitmap_set_bit (memory_accesses.refs_in_loop[loop->num], ref->id); > if (is_stored) > ! mark_ref_stored (ref, loop); > return; > } > > --- 1617,1627 ---- > } > bitmap_set_bit (memory_accesses.refs_in_loop[loop->num], ref->id); > if (is_stored) > ! { > ! bitmap_set_bit (memory_accesses.all_refs_stored_in_loop[loop->num], > ! ref->id); > ! mark_ref_stored (ref, loop); > ! } > return; > } > > *************** gather_mem_refs_stmt (struct loop *loop, > *** 1627,1704 **** > static void > gather_mem_refs_in_loops (void) > { > - gimple_stmt_iterator bsi; > - basic_block bb; > struct loop *loop; > loop_iterator li; > - bitmap lrefs, alrefs, alrefso; > - > - FOR_EACH_BB (bb) > - { > - loop = bb->loop_father; > - if (loop == current_loops->tree_root) > - continue; > > ! for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) > ! gather_mem_refs_stmt (loop, gsi_stmt (bsi)); > ! } > > /* Propagate the information about accessed memory references up > the loop hierarchy. */ > FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) > { > ! lrefs = memory_accesses.refs_in_loop[loop->num]; > ! alrefs = memory_accesses.all_refs_in_loop[loop->num]; > ! bitmap_ior_into (alrefs, lrefs); > > ! if (loop_outer (loop) == current_loops->tree_root) > continue; > > ! alrefso = memory_accesses.all_refs_in_loop[loop_outer (loop)->num]; > ! bitmap_ior_into (alrefso, alrefs); > } > - } > - > - /* Create a mapping from virtual operands to references that touch them > - in LOOP. */ > - > - static void > - create_vop_ref_mapping_loop (struct loop *loop) > - { > - bitmap refs = memory_accesses.refs_in_loop[loop->num]; > - struct loop *sloop; > - bitmap_iterator bi; > - unsigned i; > - mem_ref_p ref; > > ! EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi) > ! { > ! ref = memory_accesses.refs_list[i]; > ! for (sloop = loop; sloop != current_loops->tree_root; > ! sloop = loop_outer (sloop)) > ! if (bitmap_bit_p (ref->stored, loop->num)) > ! { > ! bitmap refs_stored > ! = memory_accesses.all_refs_stored_in_loop[sloop->num]; > ! bitmap_set_bit (refs_stored, ref->id); > ! } > ! } > } > > - /* For each non-clobbered virtual operand and each loop, record the memory > - references in this loop that touch the operand. */ > - > - static void > - create_vop_ref_mapping (void) > - { > - loop_iterator li; > - struct loop *loop; > - > - FOR_EACH_LOOP (li, loop, 0) > - { > - create_vop_ref_mapping_loop (loop); > - } > - } > > /* Gathers information about memory accesses in the loops. */ > > --- 1630,1707 ---- > static void > gather_mem_refs_in_loops (void) > { > struct loop *loop; > loop_iterator li; > > ! unsigned *count = XCNEWVEC (unsigned, number_of_loops ()); > > /* Propagate the information about accessed memory references up > the loop hierarchy. */ > FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) > { > ! basic_block *bbs = get_loop_body (loop); > ! struct loop *outer; > ! unsigned i; > ! > ! for (i = 0; i < loop->num_nodes; ++i) > ! { > ! basic_block bb = bbs[i]; > ! gimple_stmt_iterator gsi; > ! if (bb->loop_father != loop) > ! continue; > ! for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > ! gather_mem_refs_stmt (loop, gsi_stmt (gsi)); > ! } > ! free (bbs); > > ! /* Finalize the overall numbers (including subloops) for this loop. > */ > ! count[loop->num] > ! += bitmap_count_bits (memory_accesses.refs_in_loop[loop->num]); > ! > ! /* If there are too many memory references in this loop and its > ! sub-loops such that dependence testing would blow up compile-time > ! drop a unanalyzed store into the list of memory references to > ! get to the early-out. */ > ! if ((count[loop->num] > ! > (unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) > ! && bitmap_set_bit > ! (memory_accesses.all_refs_stored_in_loop[loop->num], > ! UNANALYZABLE_MEM_ID)) > ! { > ! bitmap_set_bit (memory_accesses.refs_in_loop[loop->num], > ! UNANALYZABLE_MEM_ID); > ! mark_ref_stored (memory_accesses.refs_list[UNANALYZABLE_MEM_ID], > ! loop); > ! if (dump_file && (dump_flags & TDF_DETAILS)) > ! fprintf (dump_file, "Too many memory references in loop %d and its " > ! "super-loops, giving up\n", loop->num); > ! warning_at (gimple_location (gsi_stmt (gsi_start_bb (loop->header))), > ! OPT_Wdisabled_optimization, > ! "-ftree-loop-im: number of memory references in loop " > ! "exceeds the --param loops-max-datarefs-for-datadeps " > ! "threshold"); > ! } > ! > ! /* Finalize the overall touched references (including subloops). */ > ! bitmap_ior_into (memory_accesses.all_refs_in_loop[loop->num], > ! memory_accesses.refs_in_loop[loop->num]); > ! > ! /* Propagate the information about accessed memory references up > ! the loop hierarchy. */ > ! outer = loop_outer (loop); > ! if (outer == current_loops->tree_root) > continue; > > ! bitmap_ior_into (memory_accesses.all_refs_in_loop[outer->num], > ! memory_accesses.all_refs_in_loop[loop->num]); > ! bitmap_ior_into (memory_accesses.all_refs_stored_in_loop[outer->num], > ! memory_accesses.all_refs_stored_in_loop[loop->num]); > ! count[outer->num] += count[loop->num]; > } > > ! free (count); > } > > > /* Gathers information about memory accesses in the loops. */ > > *************** analyze_memory_references (void) > *** 1731,1737 **** > memory_accesses.ttae_cache = NULL; > > gather_mem_refs_in_loops (); > - create_vop_ref_mapping (); > } > > /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache > in > --- 1734,1739 ---- > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend