This allows the backend to generate movcc instructions, if target machine has movcc pattern.
branchless-cond.c needs to be updated since some target machines have conditional move instructions, and the experssion will not change to branchless expression. gcc/ChangeLog: PR target/113095 * match.pd (`(zero_one == 0) ? y : z <op> y`, `(zero_one != 0) ? z <op> y : y`): Do not match to branchless expression, if target machine has conditional move pattern. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/branchless-cond.c: Update testcase. --- gcc/match.pd | 30 +++++++++++++++++-- .../gcc.dg/tree-ssa/branchless-cond.c | 6 ++-- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index e42ecaf9ec7..a1f90b1cd41 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4231,7 +4231,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_PRECISION (type) > 1 && (INTEGRAL_TYPE_P (TREE_TYPE (@0)))) - (op (mult (convert:type @0) @2) @1)))) + (with { + bool can_movecc_p = false; + if (can_conditionally_move_p (TYPE_MODE (type))) + can_movecc_p = true; + + /* Some target only support word_mode for movcc pattern, if type can + extend to word_mode then use conditional move. Even if there is a + extend instruction, the cost is lower than branchless. */ + if (can_extend_p (word_mode, TYPE_MODE (type), TYPE_UNSIGNED (type)) + && can_conditionally_move_p (word_mode)) + can_movecc_p = true; + } + (if (!can_movecc_p) + (op (mult (convert:type @0) @2) @1)))))) /* (zero_one != 0) ? z <op> y : y -> ((typeof(y))zero_one * z) <op> y */ (for op (bit_xor bit_ior plus) @@ -4243,7 +4256,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (type) && TYPE_PRECISION (type) > 1 && (INTEGRAL_TYPE_P (TREE_TYPE (@0)))) - (op (mult (convert:type @0) @2) @1)))) + (with { + bool can_movecc_p = false; + if (can_conditionally_move_p (TYPE_MODE (type))) + can_movecc_p = true; + + /* Some target only support word_mode for movcc pattern, if type can + extend to word_mode then use conditional move. Even if there is a + extend instruction, the cost is lower than branchless. */ + if (can_extend_p (word_mode, TYPE_MODE (type), TYPE_UNSIGNED (type)) + && can_conditionally_move_p (word_mode)) + can_movecc_p = true; + } + (if (!can_movecc_p) + (op (mult (convert:type @0) @2) @1)))))) /* ?: Value replacement. */ /* a == 0 ? b : b + a -> b + a */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c index e063dc4bb5f..c002ed97364 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/branchless-cond.c @@ -21,6 +21,6 @@ int f4(unsigned int x, unsigned int y, unsigned int z) return ((x & 1) != 0) ? z | y : y; } -/* { dg-final { scan-tree-dump-times " \\\*" 4 "optimized" } } */ -/* { dg-final { scan-tree-dump-times " & " 4 "optimized" } } */ -/* { dg-final { scan-tree-dump-not "if " "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\\*" 4 "optimized" { xfail { "aarch64*-*-* alpha*-*-* bfin*-*-* epiphany-*-* i?86-*-* x86_64-*-* nds32*-*-*" } } } } */ +/* { dg-final { scan-tree-dump-times " & " 4 "optimized" { xfail { "aarch64*-*-* alpha*-*-* bfin*-*-* epiphany-*-* i?86-*-* x86_64-*-* nds32*-*-*" } } } } */ +/* { dg-final { scan-tree-dump-not "if " "optimized" { xfail { "aarch64*-*-* alpha*-*-* bfin*-*-* epiphany-*-* i?86-*-* x86_64-*-* nds32*-*-*" } } } } */ -- 2.40.1