minus

Bin Cheng Tue, 11 Oct 2016 08:04:28 -0700

Hi,
Given below test case,
int foo (unsigned short a[], unsigned int x)
{
  unsigned int i;
  for (i = 0; i < 1000; i++)
    {
      x = a[i];
      a[i] = (unsigned short)(x >= 32768 ? x - 32768 : 0);
    }
  return x;
}


it now can be vectorized on AArch64, but generated assembly is way from optimal:
.L4:
        ldr     q4, [x3, x1]
        add     w2, w2, 1
        cmp     w2, w0
        ushll   v1.4s, v4.4h, 0
        ushll2  v0.4s, v4.8h, 0
        add     v3.4s, v1.4s, v6.4s
        add     v2.4s, v0.4s, v6.4s
        cmhi    v1.4s, v1.4s, v5.4s
        cmhi    v0.4s, v0.4s, v5.4s
        and     v1.16b, v3.16b, v1.16b
        and     v0.16b, v2.16b, v0.16b
        xtn     v2.4h, v1.4s
        xtn2    v2.8h, v0.4s
        str     q2, [x3, x1]
        add     x1, x1, 16
        bcc     .L4

The vectorized loop has 15 instructions, which can be greatly simplified by 
turning cond_expr into max_expr, as below:
.L4:
        ldr     q1, [x3, x1]
        add     w2, w2, 1
        cmp     w2, w0
        umax    v0.8h, v1.8h, v2.8h
        add     v0.8h, v0.8h, v2.8h
        str     q0, [x3, x1]
        add     x1, x1, 16
        bcc     .L4

This patch addresses the issue by adding new vectorization pattern.
Bootstrap and test on x86_64 and AArch64.  Is it OK?

Thanks,
bin

2016-10-11  Bin Cheng  <bin.ch...@arm.com>

        * tree-vect-patterns.c (vect_recog_min_max_modify_pattern): New.
        (vect_vect_recog_func_ptrs): New element for above pattern.
        * tree-vectorizer.h (NUM_PATTERNS): Increase by 1.

gcc/testsuite/ChangeLog
2016-10-11  Bin Cheng  <bin.ch...@arm.com>

        * gcc.dg/vect/vect-umax-modify-pattern.c: New test.
        * gcc.dg/vect/vect-umin-modify-pattern.c: New test.

diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 7e6e45d..b502498 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -65,6 +65,8 @@ static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
 static gimple *vect_recog_mult_pattern (vec<gimple *> *,
                                       tree *, tree *);
 
+static gimple *vect_recog_min_max_modify_pattern (vec<gimple *> *,
+                                                 tree *, tree *);
 static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
                                                  tree *, tree *);
 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
@@ -91,6 +93,7 @@ static vect_recog_func 
vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
       { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
       {        vect_recog_divmod_pattern, "divmod" },
       {        vect_recog_mult_pattern, "mult" },
+      {        vect_recog_min_max_modify_pattern, "min_max_modify" },
       {        vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
       {        vect_recog_bool_pattern, "bool" },
       {        vect_recog_mask_conversion_pattern, "mask_conversion" }
@@ -2964,6 +2967,262 @@ vect_recog_divmod_pattern (vec<gimple *> *stmts,
   return pattern_stmt;
 }
 
+/* Function vect_recog_min_max_modify_pattern
+
+   Try to find the following pattern:
+
+     type x_t, iftmp1_t, iftmp2_t;
+     TYPE a_T;
+   loop:
+     S1  a_T = (TYPE)x_t;
+     S2  iftmp1_t = x_t - CONST1;
+     S3  iftmp2_t = (a_T > (TYPE)CONST2) ? iftmp1_t : (CONST2 - CONST1);
+
+   Or
+
+   loop:
+     S1  a_T = (TYPE)x_t;
+     S2  iftmp1_t = x_t + CONST1;
+     S3  iftmp2_t = (a_T < (TYPE)CONST2) ? iftmp1_t : (CONST2 + CONST1);
+
+   where both type 'TYPE' and 'type' are unsigned integral types and
+   TYPE is larger than type in prevision.  All constant operands need
+   to fit into 'type'.
+
+   Input:
+
+   * LAST_STMT: A stmt from which the pattern search begins.
+
+   Output:
+
+   * TYPE_IN: The type of the input arguments to the pattern.
+
+   * TYPE_OUT: The type of the output of this pattern.
+
+   * Return value: A new stmt that will be used to replace the pattern,
+     as well as an additional def_stmt is generated.
+
+       patt_5 = MAX_EXPR <x_t, CONST2>;
+       iftmp2_t = patt_5 - CONST1;
+
+     Or
+
+       patt_5 = MIN_EXPR <x_t, CONST2>;
+       iftmp2_t = patt_5 + CONST1;  */
+
+static gimple *
+vect_recog_min_max_modify_pattern (vec<gimple *> *stmts, tree *type_in,
+                                  tree *type_out)
+{
+  gimple *last_stmt = (*stmts)[0];
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
+  gimple *pattern_stmt, *def_stmt;
+  vec_info *vinfo = stmt_vinfo->vinfo;
+  enum tree_code code, p_code;
+  tree op0 = NULL_TREE, op1, p_op1, orig_type;
+  bool promotion;
+
+  if (!is_gimple_assign (last_stmt)
+      || gimple_assign_rhs_code (last_stmt) != COND_EXPR
+      || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
+    return NULL;
+
+  tree cond_expr = gimple_assign_rhs1 (last_stmt);
+  if (!COMPARISON_CLASS_P (cond_expr))
+    return NULL;
+
+  enum tree_code cond_code = TREE_CODE (cond_expr);
+  if (cond_code != LE_EXPR && cond_code != LT_EXPR
+      && cond_code != GE_EXPR && cond_code != GT_EXPR)
+    return NULL;
+
+  tree then_clause = gimple_assign_rhs2 (last_stmt);
+  tree else_clause = gimple_assign_rhs3 (last_stmt);
+  tree type = TREE_TYPE (then_clause);
+  if (!INTEGRAL_TYPE_P (type) || !TYPE_OVERFLOW_WRAPS (type))
+    return NULL;
+
+  tree vectype = get_vectype_for_scalar_type (type);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  /* Make sure the constant operand appears in else clause.  */
+  if (TREE_CODE (then_clause) == INTEGER_CST)
+    {
+      cond_code = invert_tree_comparison (cond_code, false);
+      std::swap (then_clause, else_clause);
+    }
+  if (TREE_CODE (then_clause) != SSA_NAME
+      || TREE_CODE (else_clause) != INTEGER_CST)
+    return NULL;
+
+  tree cond_op0 = TREE_OPERAND (cond_expr, 0);
+  tree cond_op1 = TREE_OPERAND (cond_expr, 1);
+  tree cond_type = TREE_TYPE (cond_op0);
+  if (!INTEGRAL_TYPE_P (cond_type) || TREE_CODE (cond_op1) != INTEGER_CST)
+    return NULL;
+
+  /* Handle simple boundary cases.  Below code snippet:
+
+       type x_t;
+       signed type a_T;
+
+       a_T = (signed type)x_t;
+       r = (a_t OP 0) ? then_clause : else_clause;
+
+     in which OP is either "<" or ">=".  It equals to:
+
+       type x_t;
+       signed type a_T;
+
+       a_T = (signed type)x_t;
+       r = (x_t OP TYPE_MAX_VALUE (type)) ? then_clause : else_clause;
+
+     in which OP is ">" or "<=".  */
+  if (tree_nop_conversion_p (type, cond_type)
+      && TYPE_UNSIGNED (type) && !TYPE_UNSIGNED (cond_type)
+      && TREE_CODE (cond_op0) == SSA_NAME && integer_zerop (cond_op1)
+      && type_conversion_p (cond_op0, last_stmt, false,
+                           &orig_type, &def_stmt, &promotion)
+      && orig_type && types_compatible_p (type, orig_type))
+    {
+      if (cond_code == LT_EXPR)
+       cond_code = GT_EXPR;
+      else if (cond_code == GE_EXPR)
+       cond_code = LE_EXPR;
+      else
+       return NULL;
+
+      cond_op0 = gimple_assign_rhs1 (def_stmt);
+      cond_op1 = TYPE_MAX_VALUE (cond_type);
+      cond_type = orig_type;
+    }
+
+  /* Check if MAX_EXPR/MIN_EXPR is supported.  Do it here so we can early
+     out if operator is not supported.  */
+  if (cond_code == GE_EXPR || cond_code == GT_EXPR)
+    code = MAX_EXPR;
+  else if (cond_code == LE_EXPR || cond_code == LT_EXPR)
+    code = MIN_EXPR;
+  else
+    return NULL;
+  optab optab = optab_for_tree_code (code, vectype, optab_default);
+  if (!optab || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+    return NULL;
+
+  /* Check if cond_op0 is converted from type.  */
+  if (TYPE_PRECISION (cond_type) > TYPE_PRECISION (type))
+    {
+      if (!int_fits_type_p (cond_op1, type))
+       return NULL;
+      else
+       cond_op1 = fold_convert (type, cond_op1);
+
+      orig_type = NULL_TREE;
+      /* If the conversion is folded in cond_expr.  */
+      if (CONVERT_EXPR_CODE_P (TREE_CODE (cond_op0)))
+       {
+         cond_op0 = TREE_OPERAND (cond_op0, 0);
+         orig_type = TREE_TYPE (cond_op0);
+       }
+      /* Or not.  */
+      else if (type_conversion_p (cond_op0, last_stmt, false,
+                                 &orig_type, &def_stmt, &promotion))
+       {
+         cond_op0 = gimple_assign_rhs1 (def_stmt);
+       }
+
+      if (!orig_type || !types_compatible_p (type, orig_type))
+       return NULL;
+    }
+  /* Only unsigned type supported so far.  */
+  if (!TYPE_UNSIGNED (type) || !TYPE_UNSIGNED (cond_type))
+    return NULL;
+
+  def_stmt = SSA_NAME_DEF_STMT (then_clause);
+  if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
+    return NULL;
+  enum tree_code then_code = gimple_assign_rhs_code (def_stmt);
+  if (then_code != PLUS_EXPR && then_code != MINUS_EXPR)
+    return NULL;
+
+  tree then_op0 = gimple_assign_rhs1 (def_stmt);
+  tree then_op1 = gimple_assign_rhs2 (def_stmt);
+  if (TREE_CODE (then_op1) != INTEGER_CST
+      || !operand_equal_p (cond_op0, then_op0, 0))
+    return NULL;
+
+  /* MAX_EXPR case.  */
+  if (cond_code == GE_EXPR || cond_code == GT_EXPR)
+    {
+      if (then_code == PLUS_EXPR)
+       then_op1 = fold_build1 (NEGATE_EXPR, type, then_op1);
+
+      op1 = fold_build2 (PLUS_EXPR, type, else_clause, then_op1);
+      if (wi::to_widest (cond_op1) != wi::to_widest (op1)
+         /* Boundary cases need more care, for example:
+
+              cond_op0 > (CONST2 - 1) ? then_clause : CONST2 - CONST1
+
+            equals to:
+
+              cond_op0 >= (CONST2) ? then_clause : CONST2 - CONST1
+
+            In these cases, cond_op0 and cond_op1 have the same value,
+            as well as then_clause and else_clause.  It doesn't matter
+            which value is taken.  */
+         && !(cond_code == GT_EXPR
+              && (wi::to_widest (cond_op1) + 1) == wi::to_widest (op1))
+         && !(cond_code == GE_EXPR
+              && (wi::to_widest (cond_op1) - 1) == wi::to_widest (op1)))
+       return NULL;
+
+      op0 = then_op0;
+      code = MAX_EXPR;
+      p_code = MINUS_EXPR;
+      p_op1 = then_op1;
+    }
+  /* MIN_EXPR case.  */
+  if (cond_code == LE_EXPR || cond_code == LT_EXPR)
+    {
+      if (then_code == MINUS_EXPR)
+       then_op1 = fold_build1 (NEGATE_EXPR, type, then_op1);
+
+      op1 = fold_build2 (MINUS_EXPR, type, else_clause, then_op1);
+      if (wi::to_widest (cond_op1) != wi::to_widest (op1)
+         && !(cond_code == LT_EXPR
+              && (wi::to_widest (cond_op1) - 1) == wi::to_widest (op1))
+         && !(cond_code == LE_EXPR
+              && (wi::to_widest (cond_op1) + 1) == wi::to_widest (op1)))
+       return NULL;
+
+      op0 = then_op0;
+      code = MIN_EXPR;
+      p_code = PLUS_EXPR;
+      p_op1 = then_op1;
+    }
+  gcc_assert (op0 != NULL_TREE);
+
+  def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
+                                 code, op0, op1);
+  pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
+                                     p_code, gimple_assign_lhs (def_stmt),
+                                     p_op1);
+
+  new_pattern_def_seq (stmt_vinfo, def_stmt);
+  def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
+  set_vinfo_for_stmt (def_stmt, def_stmt_info);
+  STMT_VINFO_VECTYPE (def_stmt_info) = vectype;
+  *type_in = vectype;
+  *type_out = vectype;
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "vect_recog_min_max_modify_pattern: detected:\n");
+
+  return pattern_stmt;
+}
+
 /* Function vect_recog_mixed_size_cond_pattern
 
    Try to find the following pattern:
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 29ef676..e8f93ea 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1180,7 +1180,7 @@ extern bool is_simple_and_all_uses_invariant (gimple *, 
loop_vec_info);
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple *(* vect_recog_func_ptr) (vec<gimple *> *, tree *, tree *);
-#define NUM_PATTERNS 14
+#define NUM_PATTERNS 15
 void vect_pattern_recog (vec_info *);
 
 /* In tree-vectorizer.c.  */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-umax-modify-pattern.c 
b/gcc/testsuite/gcc.dg/vect/vect-umax-modify-pattern.c
new file mode 100644
index 0000000..87ac36b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-umax-modify-pattern.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+int foo1 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x >= 32768 ? x - 32768 : 0);
+    }
+  return x;
+}
+
+int foo2 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x > 32768 ? x - 32768 : 0);
+    }
+  return x;
+}
+
+int foo3 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x > 1000 ? x - 1000 : 0);
+    }
+  return x;
+}
+
+int foo4 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x >= 2 ? x - 32768 : 32770);
+    }
+  return x;
+}
+/* { dg-final { scan-tree-dump-times "MAX_EXPR " 4 "optimized" { xfail 
vect_no_int_min_max } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-umin-modify-pattern.c 
b/gcc/testsuite/gcc.dg/vect/vect-umin-modify-pattern.c
new file mode 100644
index 0000000..85f8bbc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-umin-modify-pattern.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
+
+int foo1 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x <= 32768 ? x + 32768 : 0);
+    }
+  return x;
+}
+
+int foo2 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x < 32768 ? x + 32768 : 0);
+    }
+  return x;
+}
+
+int foo3 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x < 1000 ? x - 1000 : 0);
+    }
+  return x;
+}
+
+int foo4 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x <= 2 ? x + 999 : 1001);
+    }
+  return x;
+}
+
+/* { dg-final { scan-tree-dump-times "MIN_EXPR " 4 "optimized" { xfail 
vect_no_int_min_max } } } */

[PATCH GCC]New vectorization pattern turning cond_expr into max/min and plus/minus

Reply via email to