Skip to content

Commit 258e4cf

Browse files
committed
[AArch64] Consider COPY between disjoint register classes as expensive
The motivation is to allow passes such as MachineLICM to hoist trivial FMOV instructions out of loops, where previously it didn't do so even when the RHS is a constant. On most architectures, these expensive move instructions have a latency of 2-6 cycles, and certainly not cheap as a 0-1 cycle move.
1 parent 94a7006 commit 258e4cf

File tree

2 files changed

+79
-0
lines changed

2 files changed

+79
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,6 +1043,27 @@ static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
10431043
return Is.size() <= 2;
10441044
}
10451045

1046+
// Check if a COPY instruction is cheap.
1047+
static bool isCheapCopy(const MachineInstr &MI,
1048+
const AArch64RegisterInfo &RI) {
1049+
assert(MI.isCopy() && "Expected COPY instruction");
1050+
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1051+
1052+
// Cross-register-class copies (e.g., between GPR and FPR) are expensive on
1053+
// AArch64, typically requiring an FMOV instruction with a 2-6 cycle latency.
1054+
auto getRegClass = [&](Register Reg) -> const TargetRegisterClass * {
1055+
return Reg.isVirtual() ? MRI.getRegClass(Reg)
1056+
: Reg.isPhysical() ? RI.getMinimalPhysRegClass(Reg)
1057+
: nullptr;
1058+
};
1059+
const TargetRegisterClass *DstRC = getRegClass(MI.getOperand(0).getReg());
1060+
const TargetRegisterClass *SrcRC = getRegClass(MI.getOperand(1).getReg());
1061+
if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC))
1062+
return false;
1063+
1064+
return MI.isAsCheapAsAMove();
1065+
}
1066+
10461067
// FIXME: this implementation should be micro-architecture dependent, so a
10471068
// micro-architecture target hook should be introduced here in future.
10481069
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
@@ -1056,6 +1077,9 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
10561077
default:
10571078
return MI.isAsCheapAsAMove();
10581079

1080+
case TargetOpcode::COPY:
1081+
return isCheapCopy(MI, RI);
1082+
10591083
case AArch64::ADDWrs:
10601084
case AArch64::ADDXrs:
10611085
case AArch64::SUBWrs:
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
# RUN: llc -mtriple=aarch64 -run-pass=early-machinelicm -verify-machineinstrs -o - %s | FileCheck %s
3+
4+
# This test verifies that cross-register-class copies (e.g., between GPR and FPR)
5+
# ARE hoisted out of loops by MachineLICM, as they translate to expensive
6+
# instructions like FMOV (2-6 cycles) on AArch64.
7+
8+
---
9+
name: cross_regclass_copy_hoisted
10+
tracksRegLiveness: true
11+
registers:
12+
- { id: 0, class: gpr64 }
13+
- { id: 1, class: gpr64 }
14+
- { id: 2, class: fpr64 }
15+
body: |
16+
; CHECK-LABEL: name: cross_regclass_copy_hoisted
17+
; CHECK: bb.0:
18+
; CHECK-NEXT: successors: %bb.1(0x80000000)
19+
; CHECK-NEXT: liveins: $x0, $d0
20+
; CHECK-NEXT: {{ $}}
21+
; CHECK-NEXT: %0:gpr64 = COPY $x0
22+
; CHECK-NEXT: %2:fpr64 = COPY $d0
23+
; CHECK-NEXT: %1:gpr64 = COPY %2
24+
; CHECK-NEXT: B %bb.1
25+
; CHECK-NEXT: {{ $}}
26+
; CHECK-NEXT: bb.1:
27+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
28+
; CHECK-NEXT: {{ $}}
29+
; CHECK-NEXT: %0:gpr64 = ADDXri %0, 1, 0
30+
; CHECK-NEXT: $xzr = SUBSXri %0, 100, 0, implicit-def $nzcv
31+
; CHECK-NEXT: Bcc 11, %bb.1, implicit $nzcv
32+
; CHECK-NEXT: B %bb.2
33+
; CHECK-NEXT: {{ $}}
34+
; CHECK-NEXT: bb.2:
35+
; CHECK-NEXT: {{ $}}
36+
; CHECK-NEXT: $x0 = COPY %1
37+
; CHECK-NEXT: RET_ReallyLR implicit $x0
38+
bb.0:
39+
liveins: $x0, $d0
40+
%0:gpr64 = COPY $x0
41+
%2:fpr64 = COPY $d0
42+
B %bb.1
43+
44+
bb.1:
45+
; This COPY between FPR64 and GPR64 should be hoisted
46+
%1:gpr64 = COPY %2:fpr64
47+
%0:gpr64 = ADDXri %0:gpr64, 1, 0
48+
$xzr = SUBSXri %0:gpr64, 100, 0, implicit-def $nzcv
49+
Bcc 11, %bb.1, implicit $nzcv
50+
B %bb.2
51+
52+
bb.2:
53+
$x0 = COPY %1:gpr64
54+
RET_ReallyLR implicit $x0
55+
...

0 commit comments

Comments
 (0)