https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102566

--- Comment #30 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by hongtao Liu <liuho...@gcc.gnu.org>:

https://gcc.gnu.org/g:fb161782545224f55ba26ba663889c5e6e9a04d1

commit r12-5102-gfb161782545224f55ba26ba663889c5e6e9a04d1
Author: liuhongt <hongtao....@intel.com>
Date:   Mon Oct 25 13:59:51 2021 +0800

    Improve integer bit test on __atomic_fetch_[or|and]_* returns

    commit adedd5c173388ae505470df152b9cb3947339566
    Author: Jakub Jelinek <ja...@redhat.com>
    Date:   Tue May 3 13:37:25 2016 +0200

        re PR target/49244 (__sync or __atomic builtins will not emit 'lock
bts/btr/btc')

    optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
    with lock bts/btr/btc by turning

      mask_2 = 1 << cnt_1;
      _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
      _5 = _4 & mask_2;

    into

      _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
      _5 = _4;

    and

      mask_6 = 1 << bit_5(D);
      _1 = ~mask_6;
      _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
      _3 = _2 & mask_6;
      _4 = _3 != 0;

    into

      mask_6 = 1 << bit_5(D);
      _1 = ~mask_6;
      _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
      _4 = _11 != 0;

    But it failed to optimize many equivalent, but slighly different cases:

    1.
      _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
      _4 = (_Bool) _1;
    2.
      _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
      _4 = (_Bool) _1;
    3.
      _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
      _7 = ~_1;
      _5 = (_Bool) _7;
    4.
      _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
      _7 = ~_1;
      _5 = (_Bool) _7;
    5.
      _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
      _2 = (int) _1;
      _7 = ~_2;
      _5 = (_Bool) _7;
    6.
      _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
      _2 = (int) _1;
      _7 = ~_2;
      _5 = (_Bool) _7;
    7.
      _1 = __atomic_fetch_or_4 (ptr_6, 0x80000000, _3);
      _5 = (signed int) _1;
      _4 = _5 < 0;
    8.
      _1 = __atomic_fetch_and_4 (ptr_6, 0x7fffffff, _3);
      _5 = (signed int) _1;
      _4 = _5 < 0;
    9.
      _1 = 1 << bit_4(D);
      mask_5 = (unsigned int) _1;
      _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
      _3 = _2 & mask_5;
    10.
      mask_7 = 1 << bit_6(D);
      _1 = ~mask_7;
      _2 = (unsigned int) _1;
      _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
      _4 = (int) _3;
      _5 = _4 & mask_7;

    We make

      mask_2 = 1 << cnt_1;
      _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
      _5 = _4 & mask_2;

    and

      mask_6 = 1 << bit_5(D);
      _1 = ~mask_6;
      _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
      _3 = _2 & mask_6;
      _4 = _3 != 0;

    the canonical forms for this optimization and transform cases 1-9 to the
    equivalent canonical form.  For cases 10 and 11, we simply remove the cast
    before __atomic_fetch_or_4/__atomic_fetch_and_4 with

      _1 = 1 << bit_4(D);
      _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
      _3 = _2 & _1;

    and

      mask_7 = 1 << bit_6(D);
      _1 = ~mask_7;
      _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
      _6 = _3 & mask_7;
      _5 = (int) _6;

    2021-11-04  H.J. Lu  <hongjiu...@intel.com>
                Hongtao Liu  <hongtao....@intel.com>
    gcc/

            PR middle-end/102566
            * match.pd (nop_atomic_bit_test_and_p): New match.
            * tree-ssa-ccp.c (convert_atomic_bit_not): New function.
            (gimple_nop_atomic_bit_test_and_p): New prototype.
            (optimize_atomic_bit_test_and): Transform equivalent, but slighly
            different cases to their canonical forms.

    gcc/testsuite/

            PR middle-end/102566
            * g++.target/i386/pr102566-1.C: New test.
            * g++.target/i386/pr102566-2.C: Likewise.
            * g++.target/i386/pr102566-3.C: Likewise.
            * g++.target/i386/pr102566-4.C: Likewise.
            * g++.target/i386/pr102566-5a.C: Likewise.
            * g++.target/i386/pr102566-5b.C: Likewise.
            * g++.target/i386/pr102566-6a.C: Likewise.
            * g++.target/i386/pr102566-6b.C: Likewise.
            * gcc.target/i386/pr102566-1a.c: Likewise.
            * gcc.target/i386/pr102566-1b.c: Likewise.
            * gcc.target/i386/pr102566-2.c: Likewise.
            * gcc.target/i386/pr102566-3a.c: Likewise.
            * gcc.target/i386/pr102566-3b.c: Likewise.
            * gcc.target/i386/pr102566-4.c: Likewise.
            * gcc.target/i386/pr102566-5.c: Likewise.
            * gcc.target/i386/pr102566-6.c: Likewise.
            * gcc.target/i386/pr102566-7.c: Likewise.
            * gcc.target/i386/pr102566-8a.c: Likewise.
            * gcc.target/i386/pr102566-8b.c: Likewise.
            * gcc.target/i386/pr102566-9a.c: Likewise.
            * gcc.target/i386/pr102566-9b.c: Likewise.
            * gcc.target/i386/pr102566-10a.c: Likewise.
            * gcc.target/i386/pr102566-10b.c: Likewise.
            * gcc.target/i386/pr102566-11.c: Likewise.
            * gcc.target/i386/pr102566-12.c: Likewise.
            * gcc.target/i386/pr102566-13.c: New test.
            * gcc.target/i386/pr102566-14.c: New test.

Reply via email to