https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119042

            Bug ID: 119042
           Summary: Optimize more  !struct.x && !struct.y  codegen cases
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ktkachov at gcc dot gnu.org
  Target Milestone: ---
            Target: aarch64

Taken from the LLVM report: https://github.com/llvm/llvm-project/issues/128778
the cases apply to GCC as well:

#include <cstdint>

struct S1 {
    bool x;
    bool y;
};

struct S2 {
    bool x : 1;
    bool y : 1;
};

struct S3 {
    uint8_t x : 1;
    uint8_t y : 1;
};

struct S4 {
    uint8_t x ;
    uint8_t y ;
};

extern "C" {
auto src1(S1 it) -> bool { return !it.x && !it.y; }
auto tgt1(S1 it) -> bool { return (it.x == 0) && (it.y == 0); }

auto src2(S2 it) -> bool { return !it.x && !it.y; }
auto tgt2(S2 it) -> bool { return (it.x == 0) && (it.y == 0); }

auto src3(S3 it) -> bool { return !it.x && !it.y; }
auto tgt3(S3 it) -> bool { return (it.x == 0) && (it.y == 0); }

auto src4(S4 it) -> bool { return !it.x && !it.y; }
auto tgt4(S4 it) -> bool { return (it.x == 0) && (it.y == 0); }
}

on aarch64, for, example with -O3 generates:
src1:
        ubfx    x1, x0, 8, 8
        tst     x0, 1
        eor     w1, w1, 1
        csel    w0, w1, wzr, eq
        ret
tgt1:
        ubfx    x1, x0, 8, 8
        tst     x0, 1
        eor     w1, w1, 1
        csel    w0, w1, wzr, eq
        ret
src2:
        ubfx    x1, x0, 1, 1
        tst     x0, 1
        eor     w1, w1, 1
        and     w1, w1, 255
        csel    w0, w1, wzr, eq
        ret
tgt2:
        ubfx    x1, x0, 0, 1
        ubfx    x0, x0, 1, 1
        orr     w0, w1, w0
        eor     w0, w0, 1
        ret
src3:
        tst     x0, 3
        cset    w0, eq
        ret
tgt3:
        tst     x0, 3
        cset    w0, eq
        ret
src4:
        ubfx    x1, x0, 8, 8
        orr     w1, w1, w0
        tst     w1, 255
        cset    w0, eq
        ret
tgt4:
        ubfx    x1, x0, 8, 8
        orr     w1, w1, w0
        tst     w1, 255
        cset    w0, eq
        ret

The ubfx and orr/eor instructions can probably be optimised away.
I'm not yet sure if this needs to be done in the target or the initial GIMPLE
lowering of the conditionals, marking as tree-optimization initially

Reply via email to