https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96172

            Bug ID: 96172
           Summary: Failure to optimize direct assignment to bitfield
                    through shifts
           Product: gcc
           Version: 11.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: gabravier at gmail dot com
  Target Milestone: ---

struct ret_struct
{
    union
    {
        struct
        {
            unsigned int a : 1;
            unsigned int b : 1;
            unsigned int c : 1;
        };
        unsigned char as_char;
    };
};

ret_struct f1(uint32_t x)
{
    x >>= 16;
    ret_struct result;
    result.a = x;
    result.b = (x >> 1);
    result.c = (x >> 2);
    return result;
}
// Compiling f1 with GCC yields code equivalent to f2
ret_struct f2(uint32_t x)
{
    uint32_t a = (x >> 17) & 1;
    uint32_t d = x;
    a += a;
    x = ((x >> 18) & 1) << 2;

    ret_struct result;
    result.as_char = ((a | ((d >> 16) & 1)) | x);
    return result;
}
// Compiling f2 with GCC yields code equivalent to f3
ret_struct f3(uint32_t x)
{
    x >>= 16;
    ret_struct result;
    result.as_char = (x & 1) | ((x & 2) | (x & 4));
    return result;
}
// Compiling f3 with GCC yields code equivalent to f4
ret_struct f4(uint32_t x)
{
    ret_struct result;
    result.as_char = (x >> 16) & 7;
    return result;
}

f1 and f2 can be directly optimized to f4. That transformation is done by LLVM,
but not by GCC.

Additionally, even directly compiling f4 with GCC doesn't yield an optimal code
generation on architectures like x86. GCC generates this :

f4(unsigned int):
  shr edi, 16
  and edi, 7
  xor eax, eax
  mov al, dil
  ret

and LLVM generates this :

f4(unsigned int):
  mov eax, edi
  shr eax, 16
  and eax, 7
  ret

Reply via email to