commit adedd5c173388ae505470df152b9cb3947339566 Author: Jakub Jelinek <ja...@redhat.com> Date: Tue May 3 13:37:25 2016 +0200
re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc') optimized bit test on atomic builtin return with lock bts/btr/btc. But it works only for unsigned integers since atomic builtins operate on the 'uintptr_t' type. It fails on bool: _1 = atomic builtin; _4 = (_Bool) _1; and signed integers: _1 = atomic builtin; _2 = (int) _1; _5 = _2 & (1 << N); Improve bit test on atomic builtin return by converting: _1 = atomic builtin; _4 = (_Bool) _1; to _1 = atomic builtin; _5 = _1 & (1 << 0); _4 = (_Bool) _5; and converting: _1 = atomic builtin; _2 = (int) _1; _5 = _2 & (1 << N); to _1 = atomic builtin; _6 = _1 & (1 << N); _5 = (int) _6; gcc/ PR middle-end/102566 * tree-ssa-ccp.c (optimize_atomic_bit_test_and): Handle cast between atomic builtin and bit test. gcc/testsuite/ PR middle-end/102566 * g++.target/i386/pr102566-1.C: New test. * gcc.target/i386/pr102566-1a.c: Likewise. * gcc.target/i386/pr102566-1b.c: Likewise. * gcc.target/i386/pr102566-2.c: Likewise. --- gcc/testsuite/g++.target/i386/pr102566-1.C | 12 ++ gcc/testsuite/gcc.target/i386/pr102566-1a.c | 188 ++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr102566-1b.c | 107 +++++++++++ gcc/testsuite/gcc.target/i386/pr102566-2.c | 14 ++ gcc/tree-ssa-ccp.c | 136 +++++++++++++- 5 files changed, 452 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C new file mode 100644 index 00000000000..6e33298d8bf --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C @@ -0,0 +1,12 @@ +/* { dg-do compile { target c++11 } } */ +/* { dg-options "-O2" } */ + +#include <atomic> + +bool tbit(std::atomic<int> &i) +{ + return i.fetch_or(1, std::memory_order_relaxed) & 1; +} + +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */ +/* { dg-final { scan-assembler-not "cmpxchg" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c new file mode 100644 index 00000000000..a915de354e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c @@ -0,0 +1,188 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +void bar (void); + +__attribute__((noinline, noclone)) int +f1 (int *a, int bit) +{ + int mask = 1 << bit; + return (__sync_fetch_and_or (a, mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f2 (int *a, int bit) +{ + int mask = 1 << bit; + int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED); + int t2 = t1 & mask; + return t2 != 0; +} + +__attribute__((noinline, noclone)) long int +f3 (long int *a, int bit) +{ + long int mask = 1l << bit; + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0; +} + +__attribute__((noinline, noclone)) int +f4 (int *a) +{ + int mask = 1 << 7; + return (__sync_fetch_and_or (a, mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f5 (int *a) +{ + int mask = 1 << 13; + return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f6 (int *a) +{ + int mask = 1 << 0; + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) void +f7 (int *a, int bit) +{ + int mask = 1 << bit; + if ((__sync_fetch_and_xor (a, mask) & mask) != 0) + bar (); +} + +__attribute__((noinline, noclone)) void +f8 (int *a, int bit) +{ + int mask = 1 << bit; + if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0) + bar (); +} + +__attribute__((noinline, noclone)) int +f9 (int *a, int bit) +{ + int mask = 1 << bit; + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f10 (int *a) +{ + int mask = 1 << 7; + return (__sync_fetch_and_xor (a, mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f11 (int *a) +{ + int mask = 1 << 13; + return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f12 (int *a) +{ + int mask = 1 << 0; + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f13 (int *a, int bit) +{ + int mask = 1 << bit; + return (__sync_fetch_and_and (a, ~mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f14 (int *a, int bit) +{ + int mask = 1 << bit; + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f15 (int *a, int bit) +{ + int mask = 1 << bit; + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f16 (int *a) +{ + int mask = 1 << 7; + return (__sync_fetch_and_and (a, ~mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f17 (int *a) +{ + int mask = 1 << 13; + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0; +} + +__attribute__((noinline, noclone)) int +f18 (int *a) +{ + int mask = 1 << 0; + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) long int +f19 (long int *a, int bit) +{ + long int mask = 1l << bit; + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +__attribute__((noinline, noclone)) long int +f20 (long int *a) +{ + long int mask = 1l << 7; + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0; +} + +__attribute__((noinline, noclone)) int +f21 (int *a, int bit) +{ + int mask = 1 << bit; + return (__sync_fetch_and_or (a, mask) & mask); +} + +__attribute__((noinline, noclone)) long int +f22 (long int *a) +{ + long int mask = 1l << 7; + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask); +} + +__attribute__((noinline, noclone)) long int +f23 (long int *a) +{ + long int mask = 1l << 7; + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask); +} + +__attribute__((noinline, noclone)) short int +f24 (short int *a) +{ + short int mask = 1 << 7; + return (__sync_fetch_and_or (a, mask) & mask) != 0; +} + +__attribute__((noinline, noclone)) short int +f25 (short int *a) +{ + short int mask = 1 << 7; + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0; +} + +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */ +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */ +/* { dg-final { scan-assembler-not "cmpxchg" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c new file mode 100644 index 00000000000..c4dab8135c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c @@ -0,0 +1,107 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -g" } */ + +int cnt; + +__attribute__((noinline, noclone)) void +bar (void) +{ + cnt++; +} + +#include "pr102566-1a.c" + +int a; +long int b; +unsigned long int c; +unsigned short int d; + +int +main () +{ + __atomic_store_n (&a, 15, __ATOMIC_RELAXED); + if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15 + || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31) + __builtin_abort (); + if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31 + || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63) + __builtin_abort (); + __atomic_store_n (&b, 24, __ATOMIC_RELAXED); + if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28 + || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28) + __builtin_abort (); + __atomic_store_n (&a, 0, __ATOMIC_RELAXED); + if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128 + || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128) + __builtin_abort (); + if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320 + || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320) + __builtin_abort (); + if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321 + || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (cnt != 0 + || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 + || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 + || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129 + || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 + || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129 + || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320 + || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321) + __builtin_abort (); + if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 + || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193) + __builtin_abort (); + if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1 + || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1) + __builtin_abort (); + if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0 + || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0) + __builtin_abort (); + __atomic_store_n (&a, 8321, __ATOMIC_RELAXED); + if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193 + || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193) + __builtin_abort (); + if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1 + || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1) + __builtin_abort (); + if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0 + || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0) + __builtin_abort (); + if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128 + || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0) + __builtin_abort (); + if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128 + || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0) + __builtin_abort (); + __atomic_store_n (&a, 128, __ATOMIC_RELAXED); + if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144 + || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144) + __builtin_abort (); + __atomic_store_n (&c, 1, __ATOMIC_RELAXED); + if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129 + || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1) + __builtin_abort (); + if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129 + || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1) + __builtin_abort (); + if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128 + || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128) + __builtin_abort (); + __atomic_store_n (&d, 1, __ATOMIC_RELAXED); + if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129 + || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129 + || cnt != 2) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c new file mode 100644 index 00000000000..d1c30315353 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#include <stdatomic.h> +#include <stdbool.h> + +bool +foo (_Atomic int *v) +{ + return atomic_fetch_or_explicit (v, 1, memory_order_relaxed) & 1; +} + +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */ +/* { dg-final { scan-assembler-not "cmpxchg" } } */ diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c index 70ce6a4d5b8..a3f7b7f233e 100644 --- a/gcc/tree-ssa-ccp.c +++ b/gcc/tree-ssa-ccp.c @@ -3279,10 +3279,115 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs) || !single_imm_use (lhs, &use_p, &use_stmt) || !is_gimple_assign (use_stmt) - || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR || !gimple_vdef (call)) return; + mask = gimple_call_arg (call, 1); + tree_code rhs_code = gimple_assign_rhs_code (use_stmt); + if (rhs_code != BIT_AND_EXPR) + { + if (rhs_code != NOP_EXPR) + return; + + tree nop_lhs = gimple_assign_lhs (use_stmt); + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (nop_lhs)) + return; + + tree nop_rhs = gimple_assign_rhs1 (use_stmt); + + gimple *g; + gimple_stmt_iterator gsi; + tree var; + + if (TREE_CODE (TREE_TYPE (nop_lhs)) == BOOLEAN_TYPE) + { + /* Convert + _1 = atomic bit op; + _4 = (_Bool) _1; + to + _1 = atomic bit op; + _5 = _1 & 1; + _4 = (_Bool) _5; + */ + var = make_ssa_name (TREE_TYPE (nop_rhs)); + replace_uses_by (nop_rhs, var); + g = gimple_build_assign (var, BIT_AND_EXPR, nop_rhs, + build_int_cst (TREE_TYPE (lhs), 1)); + gsi = gsi_for_stmt (use_stmt); + gsi_insert_before (&gsi, g, GSI_NEW_STMT); + use_stmt = g; + } + else if (TYPE_PRECISION (TREE_TYPE (nop_lhs)) + == TYPE_PRECISION (TREE_TYPE (nop_rhs))) + { + gimple *use_nop_stmt; + if (!single_imm_use (nop_lhs, &use_p, &use_nop_stmt) + || !is_gimple_assign (use_nop_stmt) + || gimple_assign_rhs_code (use_nop_stmt) != BIT_AND_EXPR) + return; + + tree op_mask = mask; + if (TREE_CODE (op_mask) == SSA_NAME) + { + g = SSA_NAME_DEF_STMT (op_mask); + if (gimple_assign_rhs_code (g) == NOP_EXPR) + { + tree mask_nop_lhs = gimple_assign_lhs (g); + + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs)) + return; + + tree mask_nop_rhs = gimple_assign_rhs1 (g); + if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs)) + != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs))) + return; + op_mask = mask_nop_rhs; + g = SSA_NAME_DEF_STMT (op_mask); + } + + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET) + { + if (!is_gimple_assign (g) + || gimple_assign_rhs_code (g) != BIT_NOT_EXPR) + return; + tree reset_mask = gimple_assign_rhs1 (g); + if (TREE_CODE (op_mask) != SSA_NAME) + return; + g = SSA_NAME_DEF_STMT (reset_mask); + } + + if (!is_gimple_assign (g) + || gimple_assign_rhs_code (g) != LSHIFT_EXPR + || !integer_onep (gimple_assign_rhs1 (g))) + return; + } + + /* Convert + _1 = atomic bit op; + _2 = (int) _1; + _5 = _2 & N; + to + _1 = atomic bit op; + _6 = _1 & N; + _5 = (int) _6; + */ + replace_uses_by (nop_lhs, lhs); + tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt); + var = make_ssa_name (TREE_TYPE (use_nop_lhs)); + gimple_assign_set_lhs (use_nop_stmt, var); + gsi = gsi_for_stmt (use_stmt); + gsi_remove (&gsi, true); + release_defs (use_stmt); + gsi_remove (gsip, true); + var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var); + gsi = gsi_for_stmt (use_nop_stmt); + g = gimple_build_assign (use_nop_lhs, var); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + use_stmt = use_nop_stmt; + mask = op_mask; + } + } + switch (fn) { case IFN_ATOMIC_BIT_TEST_AND_SET: @@ -3301,7 +3406,6 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing) return; - mask = gimple_call_arg (call, 1); tree use_lhs = gimple_assign_lhs (use_stmt); if (!use_lhs) return; @@ -3434,18 +3538,40 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, of the specified bit after the atomic operation (makes only sense for xor, otherwise the bit content is compile time known), we need to invert the bit. */ + tree mask_convert = mask; + gimple *g_convert = nullptr; + if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask)) + { + mask_convert = make_ssa_name (TREE_TYPE (lhs)); + tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask); + g_convert = gimple_build_assign (mask_convert, var); + } g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), BIT_XOR_EXPR, new_lhs, use_bool ? build_int_cst (TREE_TYPE (lhs), 1) - : mask); + : mask_convert); new_lhs = gimple_assign_lhs (g); if (throws) { - gsi_insert_on_edge_immediate (e, g); + if (g_convert) + { + gsi_insert_on_edge_immediate (e, g_convert); + gsi = gsi_for_stmt (g_convert); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + } + else + gsi_insert_on_edge_immediate (e, g); gsi = gsi_for_stmt (g); } else - gsi_insert_after (&gsi, g, GSI_NEW_STMT); + { + if (g_convert) + { + gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT); + gsi = gsi_for_stmt (g_convert); + } + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + } } if (use_bool && has_debug_uses) { -- 2.31.1