Issue 145375
Summary Bad codegen with multiple paths for small struct return
Labels llvm:optimizations, missed-optimization
Assignees
Reporter aengelke
    Clang generates a single return, SROA inserts code to merge struct fields there, but the tail is never duplicated causing suboptimal codegen. (For a slightly simpler case without the ternary at the return, InstCombine will optimize to "good" code on x86-64, but not on AArch64.)

```c++
struct X2 { char a, b; };
X2 s(unsigned num) {
    if (num < 3) [[likely]] {
        static constexpr X2 data[] = {{1, 1}, {2, 2}, {3, 3}};
        return data[num];
 }
    return num == 12 ? X2{8, 7} : X2{0, 0};
}
```
Clang:
```
s(unsigned int):
        cmp     edi, 2
 ja      .LBB0_2
        mov     ecx, edi
        lea     rdx, [rip + s(unsigned int)::data]
        movzx   eax, byte ptr [rdx + 2*rcx]
 movzx   ecx, byte ptr [rdx + 2*rcx + 1]
        shl     ecx, 8
 movzx   eax, ax
        or      eax, ecx
        ret
.LBB0_2:
        xor edx, edx
        xor     eax, eax
        cmp     edi, 12
        sete al
        mov     ecx, 1792
        cmovne  ecx, edx
        shl eax, 3
        movzx   eax, ax
        or      eax, ecx
 ret
```
GCC:
```
s(unsigned int):
        cmp     edi, 2
        ja .L2
        mov     edi, edi
        movzx   eax, WORD PTR s(unsigned int)::data[rdi+rdi]
        ret
.L2:
        xor     eax, eax
        mov edx, 1800
        cmp     edi, 12
        cmove   eax, edx
 ret
```
https://godbolt.org/z/a3Wb38Taf
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to