[Bug c/87139] New: 6.4 x86_64 incorrect code generation with -O3 around _addcarry_u64

cbcode at gmail dot com Wed, 29 Aug 2018 04:32:56 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87139


            Bug ID: 87139
           Summary: 6.4 x86_64 incorrect code generation with -O3 around
                    _addcarry_u64
           Product: gcc
           Version: 6.4.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: cbcode at gmail dot com
  Target Milestone: ---

x86_64 gcc-6 (tested with 6.4.0 and 6.3.0 under linux and 6.4.0 under
windows/mingw) generates incorrect code when optimizing with -O3 in certain
situations around _addcarry_u64. gcc-6 with -O2 is fine. gcc-5, gcc-7 and gcc-8
with -O3 are all fine. It is difficult to pinpoint the exact problem but the
following should narrow it down enough to be helpful.

#include <array>
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <x86intrin.h>

using std::size_t;
using std::uint64_t;
static_assert(sizeof(size_t) == sizeof(uint64_t), "");

//big-integer multiplication, limb-size is size_t is uint64_t
template<size_t Na, size_t Nb>
std::array<size_t, Na + Nb> big_mul(std::array<size_t, Na> const& aa,
std::array<size_t, Nb> const& bb) noexcept {
    static constexpr size_t const Nc = Na + Nb;
    std::array<size_t, Nc> cc = {{0,}};
    for (size_t na = 0; na != Na; ++na) {
        for (size_t nb = 0; nb != Nb; ++nb) {
            alignas(__uint128_t) size_t lohi[2];
            reinterpret_cast<__uint128_t&>(lohi) = __uint128_t(aa[na]) *
__uint128_t(bb[nb]); //Note: placement-new instead of reinterpret_cast makes no
difference
            size_t n = na + nb;
            unsigned char c = __builtin_add_overflow(cc[n], lohi[0], &cc[n]);
#if 1 //Note: x86_64-gcc-6 -O3 generates bad code; gcc-6 -O2 is fine; gcc-5 and
gcc-7 and above are fine
            c = _addcarry_u64(c, cc[n + 1], lohi[1], &reinterpret_cast<unsigned
long long&>(cc[n + 1]));
            for (n += 2; n <= Nc - 1; ++n) c = __builtin_add_overflow(cc[n], c,
&cc[n]);
#elif 1 //Note: added 'c &&' in for-loop condition: problem goes away
            c = _addcarry_u64(c, cc[n + 1], lohi[1], &reinterpret_cast<unsigned
long long&>(cc[n + 1]));
            for (n += 2; c && n <= Nc - 1; ++n) c =
__builtin_add_overflow(cc[n], c, &cc[n]);
#else //Note: inserted '++n' between add_overflow and add_carry: problem goes
away
            ++n;
            c = _addcarry_u64(c, cc[n], lohi[1], &reinterpret_cast<unsigned
long long&>(cc[n]));
            for (++n; c && n <= Nc - 1; ++n) c = __builtin_add_overflow(cc[n],
c, &cc[n]);
#endif
        }
    }
    return cc;
}
//simple random-number-generator,
https://github.com/svaarala/duktape/blob/master/misc/splitmix64.c
static uint64_t rnd_state = 1;
static inline uint64_t rnd() {
    uint64_t z = rnd_state += 0x9e3779b97f4a7c15;
    z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
    z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
    return z ^ (z >> 31);
}
//main() writes python3-script; pipe into python3; no output means no error
//g++-6 -std=c++11 -O3 -o test test.cpp && ./test 100 | python3 #bad
//g++-6 -std=c++11 -O2 -o test test.cpp && ./test 100 | python3 #Ok
//g++-5 -std=c++11 -O3 -o test test.cpp && ./test 100 | python3 #Ok
//g++-7 -std=c++11 -O3 -o test test.cpp && ./test 100 | python3 #Ok
//g++-8 -std=c++11 -O3 -o test test.cpp && ./test 100 | python3 #Ok
int main(int argc, char const** argv) { assert(argc == 2);
    size_t const M = size_t(std::strtoull(argv[1], nullptr, 0));
    static constexpr size_t const Na = 2, Nb = 2; //Note: any combination of
Na>=2, Nb>=2 causes the problem
    std::array<size_t, Na     > aa;
    std::array<size_t, Nb     > bb;
    std::array<size_t, Na + Nb> cc;
    for (size_t m = 0; m != M; ++m) {
        for (size_t& a: aa) a = rnd();
        for (size_t& b: bb) b = rnd();
        cc = big_mul(aa, bb);
        std::printf(  "a=0x");for (size_t n = Na;    n--;)
std::printf("%016zx", aa[n]);
        std::printf("; b=0x");for (size_t n = Nb;    n--;)
std::printf("%016zx", bb[n]);
        std::printf("; c=0x");for (size_t n = Na+Nb; n--;)
std::printf("%016zx", cc[n]);
        std::printf("\nif a*b!=c: print(hex(a),'*',
hex(b),'\\n=',hex(a*b),'\\n?',hex(c))\n");
    }
    return 0;
}

[Bug c/87139] New: 6.4 x86_64 incorrect code generation with -O3 around _addcarry_u64

Reply via email to