Open
Description
Clang generates a single return, SROA inserts code to merge struct fields there, but the tail is never duplicated causing suboptimal codegen. (For a slightly simpler case without the ternary at the return, InstCombine will optimize to "good" code on x86-64, but not on AArch64.)
struct X2 { char a, b; };
X2 s(unsigned num) {
if (num < 3) [[likely]] {
static constexpr X2 data[] = {{1, 1}, {2, 2}, {3, 3}};
return data[num];
}
return num == 12 ? X2{8, 7} : X2{0, 0};
}
Clang:
s(unsigned int):
cmp edi, 2
ja .LBB0_2
mov ecx, edi
lea rdx, [rip + s(unsigned int)::data]
movzx eax, byte ptr [rdx + 2*rcx]
movzx ecx, byte ptr [rdx + 2*rcx + 1]
shl ecx, 8
movzx eax, ax
or eax, ecx
ret
.LBB0_2:
xor edx, edx
xor eax, eax
cmp edi, 12
sete al
mov ecx, 1792
cmovne ecx, edx
shl eax, 3
movzx eax, ax
or eax, ecx
ret
GCC:
s(unsigned int):
cmp edi, 2
ja .L2
mov edi, edi
movzx eax, WORD PTR s(unsigned int)::data[rdi+rdi]
ret
.L2:
xor eax, eax
mov edx, 1800
cmp edi, 12
cmove eax, edx
ret