[AArch64] Consider COPY between disjoint register classes as expensive

guy-david · guy-david · commit 258e4cf37838 · 2025-11-10T17:11:42.000+02:00
The motivation is to allow passes such as MachineLICM to hoist trivial
FMOV instructions out of loops, where previously it didn't do so even
when the RHS is a constant.
On most architectures, these expensive move instructions have a latency
of 2-6 cycles, and certainly not cheap as a 0-1 cycle move.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1043,6 +1043,27 @@ static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
   return Is.size() <= 2;
 }
 
+// Check if a COPY instruction is cheap.
+static bool isCheapCopy(const MachineInstr &MI,
+                        const AArch64RegisterInfo &RI) {
+  assert(MI.isCopy() && "Expected COPY instruction");
+  const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+
+  // Cross-register-class copies (e.g., between GPR and FPR) are expensive on
+  // AArch64, typically requiring an FMOV instruction with a 2-6 cycle latency.
+  auto getRegClass = [&](Register Reg) -> const TargetRegisterClass * {
+    return Reg.isVirtual() ? MRI.getRegClass(Reg)
+           : Reg.isPhysical() ? RI.getMinimalPhysRegClass(Reg)
+           : nullptr;
+  };
+  const TargetRegisterClass *DstRC = getRegClass(MI.getOperand(0).getReg());
+  const TargetRegisterClass *SrcRC = getRegClass(MI.getOperand(1).getReg());
+  if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC))
+    return false;
+
+  return MI.isAsCheapAsAMove();
+}
+
 // FIXME: this implementation should be micro-architecture dependent, so a
 // micro-architecture target hook should be introduced here in future.
 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
@@ -1056,6 +1077,9 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
   default:
     return MI.isAsCheapAsAMove();
 
+  case TargetOpcode::COPY:
+    return isCheapCopy(MI, RI);
+
   case AArch64::ADDWrs:
   case AArch64::ADDXrs:
   case AArch64::SUBWrs:
diff --git a/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir b/llvm/test/CodeGen/AArch64/licm-regclass-copy.mir
@@ -0,0 +1,55 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=early-machinelicm -verify-machineinstrs -o - %s | FileCheck %s
+
+# This test verifies that cross-register-class copies (e.g., between GPR and FPR)
+# ARE hoisted out of loops by MachineLICM, as they translate to expensive
+# instructions like FMOV (2-6 cycles) on AArch64.
+
+---
+name: cross_regclass_copy_hoisted
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: fpr64 }
+body: |
+  ; CHECK-LABEL: name: cross_regclass_copy_hoisted
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0, $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   %0:gpr64 = COPY $x0
+  ; CHECK-NEXT:   %2:fpr64 = COPY $d0
+  ; CHECK-NEXT:   %1:gpr64 = COPY %2
+  ; CHECK-NEXT:   B %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   %0:gpr64 = ADDXri %0, 1, 0
+  ; CHECK-NEXT:   $xzr = SUBSXri %0, 100, 0, implicit-def $nzcv
+  ; CHECK-NEXT:   Bcc 11, %bb.1, implicit $nzcv
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $x0 = COPY %1
+  ; CHECK-NEXT:   RET_ReallyLR implicit $x0
+  bb.0:
+    liveins: $x0, $d0
+    %0:gpr64 = COPY $x0
+    %2:fpr64 = COPY $d0
+    B %bb.1
+
+  bb.1:
+    ; This COPY between FPR64 and GPR64 should be hoisted
+    %1:gpr64 = COPY %2:fpr64
+    %0:gpr64 = ADDXri %0:gpr64, 1, 0
+    $xzr = SUBSXri %0:gpr64, 100, 0, implicit-def $nzcv
+    Bcc 11, %bb.1, implicit $nzcv
+    B %bb.2
+
+  bb.2:
+    $x0 = COPY %1:gpr64
+    RET_ReallyLR implicit $x0
+...