http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54802



             Bug #: 54802

           Summary: Trivial code changes result in different assembly with

                    respect to rotations and bswap.

    Classification: Unclassified

           Product: gcc

           Version: 4.8.0

            Status: UNCONFIRMED

          Severity: enhancement

          Priority: P3

         Component: c

        AssignedTo: unassig...@gcc.gnu.org

        ReportedBy: jasongross9+bugzi...@gmail.com





Created attachment 28347

  --> http://gcc.gnu.org/bugzilla/attachment.cgi?id=28347

Code files



In some C code, manually inlining constants changes whether or not gcc compiles

things to rotations or to bswaps.  In particular, the following code



uint64_t reverse0(uint64_t v) {

  v = ((v >> 1) & 0x5555555555555555ULL) | ((v & 0x5555555555555555ULL) << 1);

  v = ((v >> 2) & 0x3333333333333333ULL) | ((v & 0x3333333333333333ULL) << 2);

  v = ((v >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((v & 0x0F0F0F0F0F0F0F0FULL) << 4);

  v = ((v >> 8) & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8);

  v = ((v >> 16) & 0x0000FFFF0000FFFFULL) | ((v & 0x0000FFFF0000FFFFULL) <<

16);

  const uint64_t

      va = ((v >> 32) & 0x00000000FFFFFFFFULL),

      vb = ((v & 0x00000000FFFFFFFFULL) << 32);

  v = va | vb;

  return v;

}



uint64_t reverse1(uint64_t v) {

  v = ((v >> 1) & 0x5555555555555555ULL) | ((v & 0x5555555555555555ULL) << 1);

  v = ((v >> 2) & 0x3333333333333333ULL) | ((v & 0x3333333333333333ULL) << 2);

  v = ((v >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((v & 0x0F0F0F0F0F0F0F0FULL) << 4);

  v = ((v >> 8) & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8);

  v = ((v >> 16) & 0x0000FFFF0000FFFFULL) | ((v & 0x0000FFFF0000FFFFULL) <<

16);

  v = ((v >> 32) & 0x00000000FFFFFFFFULL) | ((v & 0x00000000FFFFFFFFULL) <<

32);

  return v;

}



compiles to 



reverse0:

.LFB8:

    .cfi_startproc

    movq    %rdi, %rdx

    movabsq    $6148914691236517205, %rax

    movabsq    $3689348814741910323, %rcx

    shrq    %rdx

    andq    %rax, %rdx

    andq    %rdi, %rax

    addq    %rax, %rax

    orq    %rdx, %rax

    movq    %rax, %rdx

    andq    %rcx, %rax

    shrq    $2, %rdx

    salq    $2, %rax

    andq    %rcx, %rdx

    movabsq    $1085102592571150095, %rcx

    orq    %rdx, %rax

    movq    %rax, %rdx

    andq    %rcx, %rax

    shrq    $4, %rdx

    salq    $4, %rax

    andq    %rcx, %rdx

    orq    %rdx, %rax

    bswap    %rax

    ret

    .cfi_endproc

.LFE8:

    .size    reverse0, .-reverse0

    .p2align 4,,15

    .globl    reverse1

    .type    reverse1, @function

reverse1:

.LFB9:

    .cfi_startproc

    movq    %rdi, %rdx

    movabsq    $6148914691236517205, %rax

    movabsq    $3689348814741910323, %rcx

    shrq    %rdx

    andq    %rax, %rdx

    andq    %rdi, %rax

    addq    %rax, %rax

    orq    %rdx, %rax

    movq    %rax, %rdx

    andq    %rcx, %rax

    shrq    $2, %rdx

    salq    $2, %rax

    andq    %rcx, %rdx

    movabsq    $1085102592571150095, %rcx

    orq    %rdx, %rax

    movq    %rax, %rdx

    andq    %rcx, %rax

    shrq    $4, %rdx

    salq    $4, %rax

    andq    %rcx, %rdx

    movabsq    $71777214294589695, %rcx

    orq    %rdx, %rax

    movq    %rax, %rdx

    andq    %rcx, %rax

    shrq    $8, %rdx

    salq    $8, %rax

    andq    %rcx, %rdx

    movabsq    $281470681808895, %rcx

    orq    %rdx, %rax

    movq    %rax, %rdx

    andq    %rcx, %rax

    shrq    $16, %rdx

    salq    $16, %rax

    andq    %rcx, %rdx

    orq    %rdx, %rax

    rorq    $32, %rax

    ret

    .cfi_endproc

.LFE9:

    .size    reverse1, .-reverse1

    .p2align 4,,15

    .globl    reverse2

    .type    reverse2, @function





In the code that I'm using this in, reverse0 is 30% faster than reverse1.  I

don't think that manual constant inlining, when each constant is used exactly

once, should change the assembly code that gcc compiles to.



The relevant (.c, .i, .s, and a log of the command line) files are attached.

Reply via email to