Skip to content

Bad codegen with multiple paths for small struct return #145375

Open
@aengelke

Description

@aengelke

Clang generates a single return, SROA inserts code to merge struct fields there, but the tail is never duplicated causing suboptimal codegen. (For a slightly simpler case without the ternary at the return, InstCombine will optimize to "good" code on x86-64, but not on AArch64.)

struct X2 { char a, b; };
X2 s(unsigned num) {
    if (num < 3) [[likely]] {
        static constexpr X2 data[] = {{1, 1}, {2, 2}, {3, 3}};
        return data[num];
    }
    return num == 12 ? X2{8, 7} : X2{0, 0};
}

Clang:

s(unsigned int):
        cmp     edi, 2
        ja      .LBB0_2
        mov     ecx, edi
        lea     rdx, [rip + s(unsigned int)::data]
        movzx   eax, byte ptr [rdx + 2*rcx]
        movzx   ecx, byte ptr [rdx + 2*rcx + 1]
        shl     ecx, 8
        movzx   eax, ax
        or      eax, ecx
        ret
.LBB0_2:
        xor     edx, edx
        xor     eax, eax
        cmp     edi, 12
        sete    al
        mov     ecx, 1792
        cmovne  ecx, edx
        shl     eax, 3
        movzx   eax, ax
        or      eax, ecx
        ret

GCC:

s(unsigned int):
        cmp     edi, 2
        ja      .L2
        mov     edi, edi
        movzx   eax, WORD PTR s(unsigned int)::data[rdi+rdi]
        ret
.L2:
        xor     eax, eax
        mov     edx, 1800
        cmp     edi, 12
        cmove   eax, edx
        ret

https://godbolt.org/z/a3Wb38Taf

Metadata

Metadata

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions