This re-organizes builtin detection and generation properly into
an analysis and transform phase (noting that when we fail late
we will generate wrong code at the moment - eventually non-fatal,
but at least it will have duplicate work in the left-over loop).

With this in place adding more kinds of builtins to detect should
not be any rocket science anymore.

Bootstrap and regtest pending on x86_64-unknown-linux-gnu.

Richard.

2012-05-31  Richard Guenther  <rguent...@suse.de>

        * tree-loop-distribution.c (enum partition_kind): New enum.
        (struct partition_s): Add kind and main_stmt members.
        (partition_alloc): Initialize kind to PKIND_NORMAL.
        (partition_builtin_p): New function.
        (copy_loop_before): Remove failure path and assert instead.
        (generate_loops_for_partition): Likewise.
        (generate_memset_zero): Fold into ...
        (generate_memset_builtin): ... this.
        (classify_partition): New function with code from
        can_generate_builtin and generate_builtin.
        (generate_builtin): Remove.
        (can_generate_builtin): Likewise.
        (fuse_partitions_with_similar_memory_accesses): Call
        partition_builtin_p instead of can_generate_builtin.
        (rdg_build_partitions): Do not call
        fuse_partitions_with_similar_memory_accesses here...
        (ldist_gen): ... but here after classifying all partitions.
        Remove failure path of generate_code_for_partition.
        (generate_code_for_partition): Generate code according
        to partition classification.

Index: trunk/gcc/tree-loop-distribution.c
===================================================================
*** trunk.orig/gcc/tree-loop-distribution.c     2012-05-31 15:58:12.000000000 
+0200
--- trunk/gcc/tree-loop-distribution.c  2012-05-31 16:26:17.255459534 +0200
*************** along with GCC; see the file COPYING3.
*** 52,60 ****
--- 52,65 ----
  #include "tree-scalar-evolution.h"
  #include "tree-pass.h"
  
+ enum partition_kind { PKIND_NORMAL, PKIND_MEMSET };
+ 
  typedef struct partition_s
  {
    bitmap stmts;
+   enum partition_kind kind;
+   /* Main statement a kind != PKIND_NORMAL partition is about.  */
+   gimple main_stmt;
  } *partition_t;
  
  DEF_VEC_P (partition_t);
*************** partition_alloc (bitmap stmts)
*** 67,72 ****
--- 72,78 ----
  {
    partition_t partition = XCNEW (struct partition_s);
    partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
+   partition->kind = PKIND_NORMAL;
    return partition;
  }
  
*************** partition_free (partition_t partition)
*** 79,84 ****
--- 85,97 ----
    free (partition);
  }
  
+ /* Returns true if the partition can be generated as a builtin.  */
+ 
+ static bool
+ partition_builtin_p (partition_t partition)
+ {
+   return partition->kind != PKIND_NORMAL;
+ }
  
  /* If bit I is not set, it means that this node represents an
     operation that has already been performed, and that should not be
*************** copy_loop_before (struct loop *loop)
*** 183,198 ****
    struct loop *res;
    edge preheader = loop_preheader_edge (loop);
  
-   if (!single_exit (loop))
-     return NULL;
- 
    initialize_original_copy_tables ();
    res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
    free_original_copy_tables ();
  
-   if (!res)
-     return NULL;
- 
    update_phis_for_loop_copy (loop, res);
    rename_variables_in_loop (res);
  
--- 196,206 ----
    struct loop *res;
    edge preheader = loop_preheader_edge (loop);
  
    initialize_original_copy_tables ();
    res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader);
+   gcc_assert (res != NULL);
    free_original_copy_tables ();
  
    update_phis_for_loop_copy (loop, res);
    rename_variables_in_loop (res);
  
*************** create_bb_after_loop (struct loop *loop)
*** 216,225 ****
     copied when COPY_P is true.  All the statements not flagged in the
     PARTITION bitmap are removed from the loop or from its copy.  The
     statements are indexed in sequence inside a basic block, and the
!    basic blocks of a loop are taken in dom order.  Returns true when
!    the code gen succeeded. */
  
! static bool
  generate_loops_for_partition (struct loop *loop, partition_t partition,
                              bool copy_p)
  {
--- 224,232 ----
     copied when COPY_P is true.  All the statements not flagged in the
     PARTITION bitmap are removed from the loop or from its copy.  The
     statements are indexed in sequence inside a basic block, and the
!    basic blocks of a loop are taken in dom order.  */
  
! static void
  generate_loops_for_partition (struct loop *loop, partition_t partition,
                              bool copy_p)
  {
*************** generate_loops_for_partition (struct loo
*** 230,242 ****
    if (copy_p)
      {
        loop = copy_loop_before (loop);
        create_preheader (loop, CP_SIMPLE_PREHEADERS);
        create_bb_after_loop (loop);
      }
  
-   if (loop == NULL)
-     return false;
- 
    /* Remove stmts not in the PARTITION bitmap.  The order in which we
       visit the phi nodes and the statements is exactly as in
       stmts_from_loop.  */
--- 237,247 ----
    if (copy_p)
      {
        loop = copy_loop_before (loop);
+       gcc_assert (loop != NULL);
        create_preheader (loop, CP_SIMPLE_PREHEADERS);
        create_bb_after_loop (loop);
      }
  
    /* Remove stmts not in the PARTITION bitmap.  The order in which we
       visit the phi nodes and the statements is exactly as in
       stmts_from_loop.  */
*************** generate_loops_for_partition (struct loo
*** 293,299 ****
      }
  
    free (bbs);
-   return true;
  }
  
  /* Build the size argument for a memset call.  */
--- 298,303 ----
*************** build_size_arg_loc (location_t loc, tree
*** 313,331 ****
    return x;
  }
  
! /* Generate a call to memset.  Return true when the operation succeeded.  */
  
  static void
! generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
!                     gimple_stmt_iterator bsi)
  {
!   tree addr_base, nb_bytes;
!   bool res = false;
    gimple_seq stmt_list = NULL, stmts;
-   gimple fn_call;
-   tree mem, fn;
    struct data_reference *dr = XCNEW (struct data_reference);
!   location_t loc = gimple_location (stmt);
  
    DR_STMT (dr) = stmt;
    DR_REF (dr) = op0;
--- 317,345 ----
    return x;
  }
  
! /* Generate a call to memset for PARTITION in LOOP.  */
  
  static void
! generate_memset_builtin (struct loop *loop, partition_t partition)
  {
!   gimple_stmt_iterator gsi;
!   gimple stmt, fn_call;
!   tree op0, nb_iter, mem, fn, addr_base, nb_bytes;
    gimple_seq stmt_list = NULL, stmts;
    struct data_reference *dr = XCNEW (struct data_reference);
!   location_t loc;
!   bool res;
! 
!   stmt = partition->main_stmt;
!   loc = gimple_location (stmt);
!   op0 = gimple_assign_lhs (stmt);
!   if (gimple_bb (stmt) == loop->latch)
!     nb_iter = number_of_latch_executions (loop);
!   else
!     nb_iter = number_of_exit_cond_executions (loop);
! 
!   /* The new statements will be placed before LOOP.  */
!   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
  
    DR_STMT (dr) = stmt;
    DR_REF (dr) = op0;
*************** generate_memset_zero (gimple stmt, tree
*** 353,359 ****
    fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
    fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
    gimple_seq_add_stmt (&stmt_list, fn_call);
!   gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "generated memset zero\n");
--- 367,373 ----
    fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
    fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
    gimple_seq_add_stmt (&stmt_list, fn_call);
!   gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      fprintf (dump_file, "generated memset zero\n");
*************** generate_memset_zero (gimple stmt, tree
*** 361,473 ****
    free_data_ref (dr);
  }
  
! /* Tries to generate a builtin function for the instructions of LOOP
!    pointed to by the bits set in PARTITION.  Returns true when the
!    operation succeeded.  */
  
! static bool
! generate_builtin (struct loop *loop, partition_t partition, bool copy_p)
  {
!   bool res = false;
!   unsigned i, x = 0;
    basic_block *bbs;
!   gimple write = NULL;
!   gimple_stmt_iterator bsi;
!   tree nb_iter = number_of_exit_cond_executions (loop);
! 
!   if (!nb_iter || nb_iter == chrec_dont_know)
!     return false;
  
    bbs = get_loop_body_in_dom_order (loop);
  
!   for (i = 0; i < loop->num_nodes; i++)
!     {
!       basic_block bb = bbs[i];
! 
!       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
!       x++;
! 
!       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
!       {
!         gimple stmt = gsi_stmt (bsi);
! 
!         if (gimple_code (stmt) == GIMPLE_LABEL
!             || is_gimple_debug (stmt))
!           continue;
! 
!         if (!bitmap_bit_p (partition->stmts, x++))
!           continue;
! 
!         /* If the stmt has uses outside of the loop fail.
!            ???  If the stmt is generated in another partition that
!            is not created as builtin we can ignore this.  */
!         if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
!           {
!             if (dump_file && (dump_flags & TDF_DETAILS))
!               fprintf (dump_file, "not generating builtin, partition has "
!                        "scalar uses outside of the loop\n");
!             goto end;
!           }
! 
!         if (is_gimple_assign (stmt)
!             && !is_gimple_reg (gimple_assign_lhs (stmt)))
!           {
!             /* Don't generate the builtins when there are more than
!                one memory write.  */
!             if (write != NULL)
!               goto end;
! 
!             write = stmt;
!             if (bb == loop->latch)
!               nb_iter = number_of_latch_executions (loop);
!           }
!       }
!     }
! 
!   if (!stmt_with_adjacent_zero_store_dr_p (write))
!     goto end;
! 
!   /* The new statements will be placed before LOOP.  */
!   bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
!   generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
!   res = true;
! 
!   /* If this is the last partition for which we generate code, we have
!      to destroy the loop.  */
!   if (!copy_p)
!     {
!       unsigned nbbs = loop->num_nodes;
!       edge exit = single_exit (loop);
!       basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
!       redirect_edge_pred (exit, src);
!       exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
!       exit->flags |= EDGE_FALLTHRU;
!       cancel_loop_tree (loop);
!       rescan_loop_exit (exit, false, true);
! 
!       for (i = 0; i < nbbs; i++)
!       delete_basic_block (bbs[i]);
  
!       set_immediate_dominator (CDI_DOMINATORS, dest,
!                              recompute_dominator (CDI_DOMINATORS, dest));
!     }
! 
!  end:
    free (bbs);
!   return res;
  }
  
! /* Generates code for PARTITION.  For simple loops, this function can
!    generate a built-in.  */
  
! static bool
  generate_code_for_partition (struct loop *loop, partition_t partition,
                             bool copy_p)
  {
!   if (generate_builtin (loop, partition, copy_p))
!     return true;
  
!   return generate_loops_for_partition (loop, partition, copy_p);
  }
  
  
--- 375,430 ----
    free_data_ref (dr);
  }
  
! /* Remove and destroy the loop LOOP.  */
  
! static void
! destroy_loop (struct loop *loop)
  {
!   unsigned nbbs = loop->num_nodes;
!   edge exit = single_exit (loop);
!   basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
    basic_block *bbs;
!   unsigned i;
  
    bbs = get_loop_body_in_dom_order (loop);
  
!   redirect_edge_pred (exit, src);
!   exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
!   exit->flags |= EDGE_FALLTHRU;
!   cancel_loop_tree (loop);
!   rescan_loop_exit (exit, false, true);
  
!   for (i = 0; i < nbbs; i++)
!     delete_basic_block (bbs[i]);
    free (bbs);
! 
!   set_immediate_dominator (CDI_DOMINATORS, dest,
!                          recompute_dominator (CDI_DOMINATORS, dest));
  }
  
! /* Generates code for PARTITION.  */
  
! static void
  generate_code_for_partition (struct loop *loop, partition_t partition,
                             bool copy_p)
  {
!   switch (partition->kind)
!     {
!     case PKIND_MEMSET:
!       generate_memset_builtin (loop, partition);
!       /* If this is the last partition for which we generate code, we have
!        to destroy the loop.  */
!       if (!copy_p)
!       destroy_loop (loop);
!       break;
! 
!     case PKIND_NORMAL:
!       generate_loops_for_partition (loop, partition, copy_p);
!       break;
  
!     default:
!       gcc_unreachable ();
!     }
  }
  
  
*************** rdg_build_components (struct graph *rdg,
*** 828,859 ****
    BITMAP_FREE (saved_components);
  }
  
! /* Returns true when it is possible to generate a builtin pattern for
!    the PARTITION of RDG.  For the moment we detect only the memset
!    zero pattern.  */
  
! static bool
! can_generate_builtin (struct graph *rdg, partition_t partition)
  {
-   unsigned i;
    bitmap_iterator bi;
!   int nb_reads = 0;
!   int nb_writes = 0;
!   int stores_zero = 0;
  
    EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
!     if (RDG_MEM_READS_STMT (rdg, i))
!       nb_reads++;
!     else if (RDG_MEM_WRITE_STMT (rdg, i))
!       {
!       gimple stmt = RDG_STMT (rdg, i);
!       nb_writes++;
!       if (!gimple_has_volatile_ops (stmt)
!           && stmt_with_adjacent_zero_store_dr_p (stmt))
!         stores_zero++;
!       }
  
!   return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
  }
  
  /* Returns true when PARTITION1 and PARTITION2 have similar memory
--- 785,855 ----
    BITMAP_FREE (saved_components);
  }
  
! /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
!    For the moment we detect only the memset zero pattern.  */
  
! static void
! classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
  {
    bitmap_iterator bi;
!   unsigned i;
!   tree nb_iter;
! 
!   partition->kind = PKIND_NORMAL;
!   partition->main_stmt = NULL;
! 
!   /* Perform general partition disqualification for builtins.  */
!   nb_iter = number_of_exit_cond_executions (loop);
!   if (!nb_iter || nb_iter == chrec_dont_know)
!     return;
  
    EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
!     {
!       gimple stmt = RDG_STMT (rdg, i);
! 
!       if (gimple_has_volatile_ops (stmt))
!       return;
  
!       /* If the stmt has uses outside of the loop fail.
!        ???  If the stmt is generated in another partition that
!        is not created as builtin we can ignore this.  */
!       if (gimple_code (stmt) != GIMPLE_PHI
!         && stmt_has_scalar_dependences_outside_loop (loop, stmt))
!       {
!         if (dump_file && (dump_flags & TDF_DETAILS))
!           fprintf (dump_file, "not generating builtin, partition has "
!                    "scalar uses outside of the loop\n");
!         return;
!       }
!     }
! 
!   /* Detect memset.  */
!   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
!     {
!       gimple stmt = RDG_STMT (rdg, i);
! 
!       if (gimple_code (stmt) == GIMPLE_PHI)
!       continue;
! 
!       /* Any scalar stmts are ok.  */
!       if (!gimple_vuse (stmt))
!       continue;
! 
!       /* Exactly one store.  */
!       if (gimple_assign_single_p (stmt)
!         && !is_gimple_reg (gimple_assign_lhs (stmt)))
!       {
!         if (partition->main_stmt != NULL)
!           return;
!         partition->main_stmt = stmt;
!       }
!       else
!       return;
!     }
! 
!   if (partition->main_stmt != NULL
!       && stmt_with_adjacent_zero_store_dr_p (partition->main_stmt))
!     partition->kind = PKIND_MEMSET;
  }
  
  /* Returns true when PARTITION1 and PARTITION2 have similar memory
*************** fuse_partitions_with_similar_memory_acce
*** 891,900 ****
    partition_t partition1, partition2;
  
    FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1)
!     if (!can_generate_builtin (rdg, partition1))
        FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2)
        if (p1 != p2
!           && !can_generate_builtin (rdg, partition2)
            && similar_memory_accesses (rdg, partition1, partition2))
          {
            bitmap_ior_into (partition1->stmts, partition2->stmts);
--- 887,896 ----
    partition_t partition1, partition2;
  
    FOR_EACH_VEC_ELT (partition_t, *partitions, p1, partition1)
!     if (!partition_builtin_p (partition1))
        FOR_EACH_VEC_ELT (partition_t, *partitions, p2, partition2)
        if (p1 != p2
!           && !partition_builtin_p (partition2)
            && similar_memory_accesses (rdg, partition1, partition2))
          {
            bitmap_ior_into (partition1->stmts, partition2->stmts);
*************** rdg_build_partitions (struct graph *rdg,
*** 971,978 ****
      VEC_safe_push (partition_t, heap, *partitions, partition);
    else
      partition_free (partition);
- 
-   fuse_partitions_with_similar_memory_accesses (rdg, partitions);
  }
  
  /* Dump to FILE the PARTITIONS.  */
--- 967,972 ----
*************** ldist_gen (struct loop *loop, struct gra
*** 1101,1111 ****
                        processed);
    BITMAP_FREE (processed);
  
    nbp = VEC_length (partition_t, partitions);
    if (nbp == 0
        || (nbp == 1
!         && !can_generate_builtin (rdg,
!                                   VEC_index (partition_t, partitions, 0)))
        || (nbp > 1
          && partition_contains_all_rw (rdg, partitions)))
      goto ldist_done;
--- 1095,1109 ----
                        processed);
    BITMAP_FREE (processed);
  
+   FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
+     classify_partition (loop, rdg, partition);
+ 
+   fuse_partitions_with_similar_memory_accesses (rdg, &partitions);
+ 
    nbp = VEC_length (partition_t, partitions);
    if (nbp == 0
        || (nbp == 1
!         && !partition_builtin_p (VEC_index (partition_t, partitions, 0)))
        || (nbp > 1
          && partition_contains_all_rw (rdg, partitions)))
      goto ldist_done;
*************** ldist_gen (struct loop *loop, struct gra
*** 1114,1121 ****
      dump_rdg_partitions (dump_file, partitions);
  
    FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
!     if (!generate_code_for_partition (loop, partition, i < nbp - 1))
!       goto ldist_done;
  
    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
    mark_sym_for_renaming (gimple_vop (cfun));
--- 1112,1118 ----
      dump_rdg_partitions (dump_file, partitions);
  
    FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
!     generate_code_for_partition (loop, partition, i < nbp - 1);
  
    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
    mark_sym_for_renaming (gimple_vop (cfun));

Reply via email to