On Sat, Jan 05, 2019 at 09:51:31AM +0100, Richard Biener wrote:
> >+ if (TYPE_PRECISION (TREE_TYPE (lhs)) == TYPE_PRECISION (TREE_TYPE
> >(arg0)))
> >+ {
> >+ if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE
> >+ || !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0)))
> >+ type = TREE_TYPE (lhs);
> >+ else
> >+ type = TREE_TYPE (arg0);
>
> This misses some comments. Presumably you want to avoid arithmetic with bool
> types (or generally non-mode precision types!?).
Just for bool because the weird semantics it has. And do as few conversions
as possible.
> >+ }
> >+ else if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION
> >(TREE_TYPE (arg0)))
> >+ type = TREE_TYPE (lhs);
> >+ else
> >+ type = TREE_TYPE (arg0);
> >+ if (!TYPE_OVERFLOW_WRAPS (type))
> >+ type = unsigned_type_for (type);
>
> Would doing this unconditionally simplify things? I think "optimizing" the -
> fwrapv case isn't worth it.
I've originally meant to do it only if really needed but then got lazy.
> >+ tree m = fold_convert (type, wide_int_to_tree (TREE_TYPE (lhs),
> >min));
> >+ enum tree_code code;
> >+ if (tree_int_cst_lt (arg0, arg1))
> >+ {
> >+ code = PLUS_EXPR;
> >+ m = int_const_binop (MINUS_EXPR, fold_convert (type, arg0), m);
> >+ }
> >+ else
> >+ {
> >+ code = MINUS_EXPR;
> >+ m = int_const_binop (PLUS_EXPR, fold_convert (type, arg0), m);
> >+ }
>
> Delay wide-int to tree conversion until here.
But by doing these computations on wide-int properly it appears it isn't
that hard to do.
Essentially, after conversion to type, lhs is known to be in [min, min+1]
range and we either want to perform lhs + a or a - lhs.
If it is in signed type, we need to verify it doesn't overflow, so if
a is non-negative, make sure (min+1) + a doesn't overflow and if a
is negative, make sure min + a doesn't overflow. Similarly, for
a - lhs case, if min (and min+1) are non-negative, make sure a - (min+1)
doesn't overflow, and if min is negative (min+1 in that case could be
negative or 0), make sure a - min doesn't overflow.
Ok for trunk if it passes bootstrap/regtest?
I guess I should add more testcases that test these corner cases,
the boundary cases of overflowing/not overflowing and different precisions
of the two types.
2019-01-05 Jakub Jelinek <[email protected]>
PR tree-optimization/88676
* tree-ssa-phiopt.c (two_value_replacement): New function.
(tree_ssa_phiopt_worker): Call it.
* gcc.dg/tree-ssa/pr88676.c: New test.
* gcc.dg/pr88676.c: New test.
* gcc.dg/tree-ssa/pr15826.c: Just verify there is no goto,
allow &.
--- gcc/tree-ssa-phiopt.c.jj 2019-01-04 18:53:11.315322371 +0100
+++ gcc/tree-ssa-phiopt.c 2019-01-05 13:04:38.185082743 +0100
@@ -48,6 +48,8 @@ along with GCC; see the file COPYING3.
#include "case-cfn-macros.h"
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
+static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
+ tree, tree);
static bool conditional_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
@@ -332,8 +334,11 @@ tree_ssa_phiopt_worker (bool do_store_el
}
/* Do the replacement of conditional if it can be done. */
- if (!early_p
- && conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
+ cfgchanged = true;
+ else if (!early_p
+ && conditional_replacement (bb, bb1, e1, e2, phi,
+ arg0, arg1))
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -572,6 +577,142 @@ factor_out_conditional_conversion (edge
return newphi;
}
+/* Optimize
+ # x_5 in range [cst1, cst2] where cst2 = cst1 + 1
+ if (x_5 op cstN) # where op is == or != and N is 1 or 2
+ goto bb3;
+ else
+ goto bb4;
+ bb3:
+ bb4:
+ # r_6 = PHI<cst3(2), cst4(3)> # where cst3 == cst4 + 1 or cst4 == cst3 + 1
+
+ to r_6 = x_5 + (min (cst3, cst4) - cst1) or
+ r_6 = (min (cst3, cst4) + cst1) - x_5 depending on op, N and which
+ of cst3 and cst4 is smaller. */
+
+static bool
+two_value_replacement (basic_block cond_bb, basic_block middle_bb,
+ edge e1, gphi *phi, tree arg0, tree arg1)
+{
+ /* Only look for adjacent integer constants. */
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
+ || !INTEGRAL_TYPE_P (TREE_TYPE (arg1))
+ || TREE_CODE (arg0) != INTEGER_CST
+ || TREE_CODE (arg1) != INTEGER_CST
+ || (tree_int_cst_lt (arg0, arg1)
+ ? wi::to_widest (arg0) + 1 != wi::to_widest (arg1)
+ : wi::to_widest (arg1) + 1 != wi::to_widest (arg1)))
+ return false;
+
+ if (!empty_block_p (middle_bb))
+ return false;
+
+ gimple *stmt = last_stmt (cond_bb);
+ tree lhs = gimple_cond_lhs (stmt);
+ tree rhs = gimple_cond_rhs (stmt);
+
+ if (TREE_CODE (lhs) != SSA_NAME
+ || !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+ || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
+ || TREE_CODE (rhs) != INTEGER_CST)
+ return false;
+
+ switch (gimple_cond_code (stmt))
+ {
+ case EQ_EXPR:
+ case NE_EXPR:
+ break;
+ default:
+ return false;
+ }
+
+ wide_int min, max;
+ if (get_range_info (lhs, &min, &max) != VR_RANGE
+ || min + 1 != max
+ || (wi::to_wide (rhs) != min
+ && wi::to_wide (rhs) != max))
+ return false;
+
+ /* We need to know which is the true edge and which is the false
+ edge so that we know when to invert the condition below. */
+ edge true_edge, false_edge;
+ extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
+ if ((gimple_cond_code (stmt) == EQ_EXPR)
+ ^ (wi::to_wide (rhs) == max)
+ ^ (e1 == false_edge))
+ std::swap (arg0, arg1);
+
+ tree type;
+ if (TYPE_PRECISION (TREE_TYPE (lhs)) == TYPE_PRECISION (TREE_TYPE (arg0)))
+ {
+ /* Avoid performing the arithmetics in bool type which has different
+ semantics, otherwise prefer unsigned types from the two with
+ the same precision. */
+ if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE
+ || !TYPE_UNSIGNED (TREE_TYPE (arg0)))
+ type = TREE_TYPE (lhs);
+ else
+ type = TREE_TYPE (arg0);
+ }
+ else if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (TREE_TYPE
(arg0)))
+ type = TREE_TYPE (lhs);
+ else
+ type = TREE_TYPE (arg0);
+
+ min = wide_int::from (min, TYPE_PRECISION (type),
+ TYPE_SIGN (TREE_TYPE (lhs)));
+ wide_int a = wide_int::from (wi::to_wide (arg0), TYPE_PRECISION (type),
+ TYPE_SIGN (TREE_TYPE (arg0)));
+ enum tree_code code;
+ wi::overflow_type ovf;
+ if (tree_int_cst_lt (arg0, arg1))
+ {
+ code = PLUS_EXPR;
+ a -= min;
+ if (!TYPE_UNSIGNED (type))
+ {
+ /* lhs is known to be in range [min, min+1] and we want to add a
+ to it. Check if that operation can overflow for those 2 values
+ and if yes, force unsigned type. */
+ wi::add (min + (wi::neg_p (a) ? 0 : 1), a, SIGNED, &ovf);
+ if (ovf)
+ type = unsigned_type_for (type);
+ }
+ }
+ else
+ {
+ code = MINUS_EXPR;
+ a += min;
+ if (!TYPE_UNSIGNED (type))
+ {
+ /* lhs is known to be in range [min, min+1] and we want to subtract
+ it from a. Check if that operation can overflow for those 2
+ values and if yes, force unsigned type. */
+ wi::sub (a, min + (wi::neg_p (min) ? 0 : 1), SIGNED, &ovf);
+ if (ovf)
+ type = unsigned_type_for (type);
+ }
+ }
+
+ tree arg = wide_int_to_tree (type, a);
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
+ lhs = gimplify_build1 (&gsi, NOP_EXPR, type, lhs);
+ tree new_rhs;
+ if (code == PLUS_EXPR)
+ new_rhs = gimplify_build2 (&gsi, PLUS_EXPR, type, lhs, arg);
+ else
+ new_rhs = gimplify_build2 (&gsi, MINUS_EXPR, type, arg, lhs);
+ if (!useless_type_conversion_p (TREE_TYPE (arg0), type))
+ new_rhs = gimplify_build1 (&gsi, NOP_EXPR, TREE_TYPE (arg0), new_rhs);
+
+ replace_phi_edge_with_variable (cond_bb, e1, phi, new_rhs);
+
+ /* Note that we optimized this PHI. */
+ return true;
+}
+
/* The function conditional_replacement does the main work of doing the
conditional replacement. Return true if the replacement is done.
Otherwise return false.
--- gcc/testsuite/gcc.dg/tree-ssa/pr88676.c.jj 2019-01-05 12:25:32.463958539
+0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr88676.c 2019-01-05 12:25:32.463958539
+0100
@@ -0,0 +1,133 @@
+/* PR tree-optimization/88676 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not " = PHI <" "optimized" } } */
+
+void bar (int, int, int);
+
+__attribute__((noipa)) int
+f1 (unsigned x)
+{
+ int r;
+ if (x >= 2)
+ __builtin_unreachable ();
+ switch (x)
+ {
+ case 0:
+ r = 1;
+ break;
+ case 1:
+ r = 2;
+ break;
+ default:
+ r = 0;
+ break;
+ }
+ return r;
+}
+
+__attribute__((noipa)) void
+f2 (int x)
+{
+ int y;
+ x = (x & 1) + 98;
+ if (x != 98)
+ y = 115;
+ else
+ y = 116;
+ bar (x, y, 116);
+}
+
+__attribute__((noipa)) void
+f3 (int x)
+{
+ int y;
+ x = (x & 1) + 98;
+ if (x == 98)
+ y = 115;
+ else
+ y = 116;
+ bar (x, y, 115);
+}
+
+__attribute__((noipa)) void
+f4 (int x)
+{
+ int y;
+ x = (x & 1) + 98;
+ if (x != 99)
+ y = 115;
+ else
+ y = 116;
+ bar (x, y, 115);
+}
+
+__attribute__((noipa)) void
+f5 (int x)
+{
+ int y;
+ x = (x & 1) + 98;
+ if (x == 99)
+ y = 115;
+ else
+ y = 116;
+ bar (x, y, 116);
+}
+
+__attribute__((noipa)) void
+f6 (int x)
+{
+ int y;
+ x = (x & 1) + 98;
+ if (x != 98)
+ y = 116;
+ else
+ y = 115;
+ bar (x, y, 115);
+}
+
+__attribute__((noipa)) void
+f7 (int x)
+{
+ int y;
+ x = (x & 1) + 98;
+ if (x == 98)
+ y = 116;
+ else
+ y = 115;
+ bar (x, y, 116);
+}
+
+__attribute__((noipa)) void
+f8 (int x)
+{
+ int y;
+ x = (x & 1) + 98;
+ if (x != 99)
+ y = 116;
+ else
+ y = 115;
+ bar (x, y, 116);
+}
+
+__attribute__((noipa)) void
+f9 (int x)
+{
+ int y;
+ x = (x & 1) + 98;
+ if (x == 99)
+ y = 116;
+ else
+ y = 115;
+ bar (x, y, 115);
+}
+
+__attribute__((noipa)) int
+f10 (int x)
+{
+ x = (x & 1) + 36;
+ if (x == 36)
+ return 85;
+ else
+ return 84;
+}
--- gcc/testsuite/gcc.dg/pr88676.c.jj 2019-01-05 12:25:32.463958539 +0100
+++ gcc/testsuite/gcc.dg/pr88676.c 2019-01-05 12:25:32.463958539 +0100
@@ -0,0 +1,48 @@
+/* PR tree-optimization/88676 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "tree-ssa/pr88676.c"
+
+__attribute__((noipa)) void
+bar (int x, int y, int z)
+{
+ if (z != 115 && z != 116)
+ __builtin_abort ();
+ if (x == 98)
+ {
+ if (y != z)
+ __builtin_abort ();
+ }
+ else if (x != 99)
+ __builtin_abort ();
+ else if (z == 115)
+ {
+ if (y != 116)
+ __builtin_abort ();
+ }
+ else if (y != 115)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ if (f1 (0) != 1 || f1 (1) != 2)
+ __builtin_abort ();
+ int i;
+ for (i = -12; i < 12; i++)
+ {
+ f2 (i);
+ f3 (i);
+ f4 (i);
+ f5 (i);
+ f6 (i);
+ f7 (i);
+ f8 (i);
+ f9 (i);
+ if (f10 (i) != ((i & 1) ? 84 : 85))
+ __builtin_abort ();
+ }
+ return 0;
+}
--- gcc/testsuite/gcc.dg/tree-ssa/pr15826.c.jj 2016-02-27 07:41:54.377920386
+0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr15826.c 2019-01-05 12:25:32.463958539
+0100
@@ -33,4 +33,4 @@ andrew (struct s *p)
return i;
}
-/* { dg-final { scan-tree-dump-times " & | goto " 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "goto " "optimized" } } */
Jakub