Hi,
Take subroutine "DACOP" from spec2k/200.sixtrack as an example, the loop needs 
to be versioned for vectorization because of possibly alias.  The alias check 
for data-references are like:

//pair 1
dr_a:
(Data Ref: 
  bb: 8 
  stmt: _92 = da.cc[_27];
  ref: da.cc[_27];
)
dr_b:
(Data Ref: 
  bb: 8 
  stmt: da.cc[_93] = _92;
  ref: da.cc[_93];
)
//pair 2
dr_a:
(Data Ref: 
  bb: 8 
  stmt: pretmp_29 = da.i2[_27];
  ref: da.i2[_27];
)
dr_b:
(Data Ref: 
  bb: 8 
  stmt: da.i2[_93] = pretmp_29;
  ref: da.i2[_93];
)
//pair 3
dr_a:
(Data Ref: 
  bb: 8 
  stmt: pretmp_28 = da.i1[_27];
  ref: da.i1[_27];
)
dr_b:
(Data Ref: 
  bb: 8 
  stmt: da.i1[_93] = pretmp_28;
  ref: da.i1[_93];
)

The code generated for alias checks are as below:

  <bb 23>:
  # iy_186 = PHI <_413(22), 2(2)>
  # ivtmp_1050 = PHI <ivtmp_1049(22), 512(2)>
  _155 = iy_186 + -2;
  _156 = _155 * 516;
  _241 = iy_186 + -1;
  _242 = _241 * 516;
  _328 = iy_186 * 516;
  _413 = iy_186 + 1;
  _414 = _413 * 516;
  _499 = iy_186 + 2;
  _500 = _499 * 516;
  _998 = iy_186 * 516;
  _997 = (sizetype) _998;
  _996 = _997 + 6;
  _995 = _996 * 4;
  _994 = global_Output.2_16 + _995;
  _993 = iy_186 * 516;
  _992 = (long unsigned int) _993;
  _991 = _992 * 4;
  _990 = _991 + 18446744073709547488;
  _989 = global_Input.0_153 + _990;
  _886 = _989 >= _994;
  _885 = iy_186 * 516;
  _884 = (sizetype) _885;
  _883 = _884 + 1040;
  _882 = _883 * 4;
  _881 = global_Input.0_153 + _882;
  _880 = (sizetype) _998;
  _879 = _880 + 2;
  _878 = _879 * 4;
  _877 = global_Output.2_16 + _878;
  _876 = _877 >= _881;
  _875 = _876 | _886;
  _874 = iy_186 * 516;
  _873 = (sizetype) _874;
  _872 = _873 + 514;
  _871 = _872 * 4;
  _870 = global_Output.2_16 + _871;
  _869 = local_Filter_33 >= _870;
  _868 = local_Filter_33 + 100;
  _867 = (sizetype) _874;
  _866 = _867 + 2;
  _865 = _866 * 4;
  _864 = global_Output.2_16 + _865;
  _863 = _864 >= _868;
  _862 = _863 | _869;
  _861 = _862 & _875;
  if (_861 != 0)
    goto <bb 7>;
  else
    goto <bb 4>;

It contains quite a lot redundant computations.  Root cause is vectorizer 
simply translates alias checks into full address expressions comparison, and 
CSE opportunities are covered by foler.  This patch improves function 
vect_create_cond_for_alias_checks by simplifying the comparison by comparing 
DR_BASE_ADDRESS/DR_INIT of both data-reference at compilation time.  It also 
simplifies conditions:
  (addr_a_min + addr_a_length) <= addr_b_min || (addr_b_min + addr_b_length) <= 
addr_a_min
into below form:
  cond_expr = addr_b_min - addr_a_min
  cond_expr >= addr_a_length || cond_expr <= -addr_b_length
if the comparison is done in signed type.  And this can be further simplified 
by folder if addr_a_length and addr_b_lengnth are equal/const, which is quite 
common.
I looked into generated assembly, this patch does introduces small regression 
in some cases, but overall I think it's good.  Bootstrap and test on x86_64 and 
AArch64.  Is it OK?
Thanks,
bin

2016-06-08  Bin Cheng  <bin.ch...@arm.com>

        * tree-vect-loop-manip.c (vect_create_cond_for_alias_checks): New
        Parameter.  Simplify alias check conditions at compilation time.
        (vect_loop_versioning): Pass new argument to above function.
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 438458e..b38a6e4 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -2211,17 +2211,22 @@ vect_create_cond_for_align_checks (loop_vec_info 
loop_vinfo,
 
    Output:
    COND_EXPR - conditional expression.
+   COND_EXPR_STMT_LIST - statements needed to construct the conditional
+                         expression.
 
    The returned COND_EXPR is the conditional expression to be used in the if
    statement that controls which version of the loop gets executed at runtime.
 */
 
 void
-vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
+vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
+                                  tree * cond_expr,
+                                  gimple_seq *cond_expr_stmt_list)
 {
   vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
     LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
   tree part_cond_expr;
+  gimple_seq seq;
 
   /* Create expression
      ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
@@ -2237,21 +2242,17 @@ vect_create_cond_for_alias_checks (loop_vec_info 
loop_vinfo, tree * cond_expr)
 
   for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
     {
+      enum tree_code code;
+      tree type_a, type_b, type_offset = ssizetype;
       const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
       const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
       tree segment_length_a = dr_a.seg_len;
       tree segment_length_b = dr_b.seg_len;
       tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
       tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
+      tree init_a = DR_INIT (dr_a.dr), init_b = DR_INIT (dr_b.dr);
       tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
 
-      offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
-                             offset_a, DR_INIT (dr_a.dr));
-      offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
-                             offset_b, DR_INIT (dr_b.dr));
-      addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
-      addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
-
       if (dump_enabled_p ())
        {
          dump_printf_loc (MSG_NOTE, vect_location,
@@ -2262,31 +2263,140 @@ vect_create_cond_for_alias_checks (loop_vec_info 
loop_vinfo, tree * cond_expr)
          dump_printf (MSG_NOTE, "\n");
        }
 
-      tree seg_a_min = addr_base_a;
-      tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
       /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
         bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
-        [a, a+12) */
+        [a, a+12).  */
       if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
        {
          tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
-         seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
-         seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
+         init_a = fold_build2 (PLUS_EXPR, type_offset, init_a,
+                               fold_convert (type_offset, unit_size));
        }
-
-      tree seg_b_min = addr_base_b;
-      tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
       if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
        {
          tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
-         seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
-         seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
+         init_b = fold_build2 (PLUS_EXPR, type_offset, init_b,
+                               fold_convert (type_offset, unit_size));
        }
 
-      part_cond_expr =
-       fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
-         fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
-         fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
+      /* No need to consider init parts if they are equal in both DRs.  */
+      if (wi::to_widest (init_a) != wi::to_widest (init_b))
+       {
+         if (wi::to_widest (init_a) < wi::to_widest (init_b))
+           {
+             init_b = fold_build2 (MINUS_EXPR, type_offset, init_b, init_a);
+             init_a = fold_convert (type_offset, integer_zero_node);
+           }
+         else
+           {
+             init_a = fold_build2 (MINUS_EXPR, type_offset, init_a, init_b);
+             init_b = fold_convert (type_offset, integer_zero_node);
+           }
+
+         offset_a = fold_build2 (PLUS_EXPR, type_offset, offset_a, init_a);
+         offset_b = fold_build2 (PLUS_EXPR, type_offset, offset_b, init_b);
+       }
+
+      /* No need to consider base parts if they are equal in both DRs.  */
+      if (operand_equal_p (addr_base_a, addr_base_b, 0))
+       {
+         code = PLUS_EXPR;
+         type_a = type_offset;
+         type_b = type_offset;
+         addr_base_a = offset_a;
+         addr_base_b = offset_b;
+       }
+      else
+       {
+         type_offset = sizetype;
+         code = POINTER_PLUS_EXPR;
+         type_a = TREE_TYPE (addr_base_a);
+         type_b = TREE_TYPE (addr_base_b);
+         addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
+         addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
+       }
+
+      segment_length_a = fold_convert (type_offset, segment_length_a);
+      segment_length_b = fold_convert (type_offset, segment_length_b);
+
+      addr_base_a = force_gimple_operand (addr_base_a, &seq, true, NULL);
+      gimple_seq_add_seq (cond_expr_stmt_list, seq);
+      addr_base_b = force_gimple_operand (addr_base_b, &seq, true, NULL);
+      gimple_seq_add_seq (cond_expr_stmt_list, seq);
+
+      tree seg_a_min;
+      tree seg_a_max;
+      if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
+       {
+         seg_a_min = fold_build2 (code, type_a,
+                                  addr_base_a, segment_length_a);
+         seg_a_max = addr_base_a;
+         /* We need absolute value of segnment length later.  */
+         segment_length_a = fold_build2 (MINUS_EXPR, type_offset,
+                                         integer_zero_node, segment_length_a);
+       }
+      else
+       {
+         seg_a_min = addr_base_a;
+         seg_a_max = fold_build2 (code, type_a,
+                                   addr_base_a, segment_length_a);
+       }
+
+      tree seg_b_min;
+      tree seg_b_max;
+      if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
+       {
+         seg_b_min = fold_build2 (code, type_b,
+                                  addr_base_b, segment_length_b);
+         seg_b_max = addr_base_b;
+         /* We need absolute value of segnment length later.  */
+         segment_length_b = fold_build2 (MINUS_EXPR, type_offset,
+                                         integer_zero_node, segment_length_b);
+       }
+      else
+       {
+         seg_b_min = addr_base_b;
+         seg_b_max = fold_build2 (code, type_b,
+                                  addr_base_b, segment_length_b);
+       }
+
+      if (!TYPE_UNSIGNED (type_offset))
+       {
+         /* If comparison is done in signed type, below condition checks:
+
+              (seg_a_max <= seg_b_min || seg_b_max <= seg_a_min)
+
+            can be simplified into:
+
+              cond_expr = seg_b_min - seg_a_min;
+              (cond_expr >= segment_length_a
+               || cond_expr <= -segment_length_b)
+
+            It can be further simplified by folder if
+
+              (segment_length_a == segment_length_b)
+
+            holds, which is common.  */
+         tree diff = fold_build2 (MINUS_EXPR, type_offset,
+                                  seg_b_min, seg_a_min);
+         diff = force_gimple_operand (diff, &seq, true, NULL);
+         gimple_seq_add_seq (cond_expr_stmt_list, seq);
+         segment_length_b = fold_build2 (MINUS_EXPR, type_offset,
+                                         integer_zero_node, segment_length_b);
+         tree expr1 = fold_build2 (GE_EXPR, boolean_type_node,
+                                   diff, segment_length_a);
+         tree expr2 = fold_build2 (LE_EXPR, boolean_type_node,
+                                   diff, segment_length_b);
+         part_cond_expr = fold_build2 (TRUTH_OR_EXPR,
+                                       boolean_type_node, expr1, expr2);
+       }
+      else
+       {
+         part_cond_expr =
+           fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+             fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
+             fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
+       }
 
       if (*cond_expr)
        *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
@@ -2356,7 +2466,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
                                       &cond_expr_stmt_list);
 
   if (version_alias)
-    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
+    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
+                                      &cond_expr_stmt_list);
 
   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
                                      is_gimple_condexpr, NULL_TREE);

Reply via email to