Skip to content

Commit 72901fe

Browse files
authored
[AArch64] Fold UBFMXri to UBFMWri when it's an LSR or LSL alias (#106968)
Using the LSR or LSL aliases of UBFM can be faster on some CPUs, so it is worth changing 64 bit UBFM instructions, that are equivalent to 32 bit LSR/LSL operations, to 32 bit variants. This change folds the following patterns: * If `Imms == 31` and `Immr <= Imms`: `UBFMXri %0, Immr, Imms` -> `UBFMWri %0.sub_32, Immr, Imms` * If `Immr == Imms + 33`: `UBFMXri %0, Immr, Imms` -> `UBFMWri %0.sub_32, Immr - 32, Imms`
1 parent 20c5432 commit 72901fe

13 files changed

+115
-26
lines changed

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1321,6 +1321,17 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
13211321
.add(StMO)
13221322
.addImm(AndMaskEncoded)
13231323
.setMIFlags(LoadI->getFlags());
1324+
} else if (IsStoreXReg && Imms == 31) {
1325+
// Use the 32 bit variant of UBFM if it's the LSR alias of the
1326+
// instruction.
1327+
assert(Immr <= Imms && "Expected LSR alias of UBFM");
1328+
BitExtMI = BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
1329+
TII->get(AArch64::UBFMWri),
1330+
TRI->getSubReg(DestReg, AArch64::sub_32))
1331+
.addReg(TRI->getSubReg(StRt, AArch64::sub_32))
1332+
.addImm(Immr)
1333+
.addImm(Imms)
1334+
.setMIFlags(LoadI->getFlags());
13241335
} else {
13251336
BitExtMI =
13261337
BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@
6464
// 8. Remove redundant CSELs that select between identical registers, by
6565
// replacing them with unconditional moves.
6666
//
67+
// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
68+
// LSR or LSL alias of UBFM.
69+
//
6770
//===----------------------------------------------------------------------===//
6871

6972
#include "AArch64ExpandImm.h"
@@ -132,6 +135,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
132135
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
133136
bool visitINSvi64lane(MachineInstr &MI);
134137
bool visitFMOVDr(MachineInstr &MI);
138+
bool visitUBFMXri(MachineInstr &MI);
135139
bool visitCopy(MachineInstr &MI);
136140
bool runOnMachineFunction(MachineFunction &MF) override;
137141

@@ -715,6 +719,57 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
715719
return true;
716720
}
717721

722+
bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) {
723+
// Check if the instruction is equivalent to a 32 bit LSR or LSL alias of
724+
// UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri.
725+
int64_t Immr = MI.getOperand(2).getImm();
726+
int64_t Imms = MI.getOperand(3).getImm();
727+
728+
bool IsLSR = Imms == 31 && Immr <= Imms;
729+
bool IsLSL = Immr == Imms + 33;
730+
if (!IsLSR && !IsLSL)
731+
return false;
732+
733+
if (IsLSL) {
734+
Immr -= 32;
735+
}
736+
737+
const TargetRegisterClass *DstRC64 =
738+
TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI, *MI.getMF());
739+
const TargetRegisterClass *DstRC32 =
740+
TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
741+
assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "
742+
"sub_32 subregister class");
743+
744+
const TargetRegisterClass *SrcRC64 =
745+
TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI, *MI.getMF());
746+
const TargetRegisterClass *SrcRC32 =
747+
TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
748+
assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "
749+
"subregister class");
750+
751+
Register DstReg64 = MI.getOperand(0).getReg();
752+
Register DstReg32 = MRI->createVirtualRegister(DstRC32);
753+
Register SrcReg64 = MI.getOperand(1).getReg();
754+
Register SrcReg32 = MRI->createVirtualRegister(SrcRC32);
755+
756+
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::COPY),
757+
SrcReg32)
758+
.addReg(SrcReg64, 0, AArch64::sub_32);
759+
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::UBFMWri),
760+
DstReg32)
761+
.addReg(SrcReg32)
762+
.addImm(Immr)
763+
.addImm(Imms);
764+
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
765+
TII->get(AArch64::SUBREG_TO_REG), DstReg64)
766+
.addImm(0)
767+
.addReg(DstReg32)
768+
.addImm(AArch64::sub_32);
769+
MI.eraseFromParent();
770+
return true;
771+
}
772+
718773
// Across a basic-block we might have in i32 extract from a value that only
719774
// operates on upper bits (for example a sxtw). We can replace the COPY with a
720775
// new version skipping the sxtw.
@@ -865,6 +920,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
865920
case AArch64::FMOVDr:
866921
Changed |= visitFMOVDr(MI);
867922
break;
923+
case AArch64::UBFMXri:
924+
Changed |= visitUBFMXri(MI);
925+
break;
868926
case AArch64::COPY:
869927
Changed |= visitCopy(MI);
870928
break;

llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@ body: |
3636
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
3737
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri [[COPY]], 273, 12
3838
; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri [[ADDXri]], 3549, 0
39-
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[ADDXri1]], 28, 31
40-
; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
39+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[ADDXri1]].sub_32
40+
; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 28, 31
41+
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[UBFMWri]], %subreg.sub_32
42+
; CHECK-NEXT: $x0 = COPY [[SUBREG_TO_REG]]
4143
; CHECK-NEXT: RET_ReallyLR implicit $x0
4244
%0:gpr64 = COPY $x0
4345
%1:gpr32 = MOVi32imm 1121757
@@ -58,8 +60,10 @@ body: |
5860
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
5961
; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64sp = SUBXri [[COPY]], 273, 12
6062
; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri [[SUBXri]], 3549, 0
61-
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBXri1]], 28, 31
62-
; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
63+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[SUBXri1]].sub_32
64+
; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 28, 31
65+
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[UBFMWri]], %subreg.sub_32
66+
; CHECK-NEXT: $x0 = COPY [[SUBREG_TO_REG]]
6367
; CHECK-NEXT: RET_ReallyLR implicit $x0
6468
%0:gpr64 = COPY $x0
6569
%1:gpr64 = MOVi64imm -1121757

llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -810,7 +810,7 @@ define i32 @fct19(i64 %arg1) nounwind readonly ssp {
810810
; LLC-NEXT: add w0, w8, #16
811811
; LLC-NEXT: ret
812812
; LLC-NEXT: .LBB26_4: // %if.end13
813-
; LLC-NEXT: ubfx x8, x0, #16, #16
813+
; LLC-NEXT: lsr w8, w0, #16
814814
; LLC-NEXT: cbz w8, .LBB26_6
815815
; LLC-NEXT: // %bb.5: // %if.then17
816816
; LLC-NEXT: adrp x9, first_ones

llvm/test/CodeGen/AArch64/arm64-ld-from-st.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ define i16 @Str64Ldr16_1(ptr nocapture %P, i64 %v, i64 %n) {
6464
; CHECK-LABEL: Str64Ldr16_1:
6565
; CHECK: // %bb.0: // %entry
6666
; CHECK-NEXT: str x1, [x0, #8]
67-
; CHECK-NEXT: ubfx x0, x1, #16, #16
67+
; CHECK-NEXT: lsr w0, w1, #16
6868
; CHECK-NEXT: ret
6969
entry:
7070
%arrayidx0 = getelementptr inbounds i64, ptr %P, i64 1
@@ -149,7 +149,7 @@ define i8 @Str64Ldr8_3(ptr nocapture %P, i64 %v, i64 %n) {
149149
; CHECK-LABEL: Str64Ldr8_3:
150150
; CHECK: // %bb.0: // %entry
151151
; CHECK-NEXT: str x1, [x0, #8]
152-
; CHECK-NEXT: ubfx x0, x1, #24, #8
152+
; CHECK-NEXT: lsr w0, w1, #24
153153
; CHECK-NEXT: ret
154154
entry:
155155
%arrayidx0 = getelementptr inbounds i64, ptr %P, i64 1
@@ -424,7 +424,7 @@ define i16 @Unscaled_Str64Ldr16_1(ptr nocapture %P, i64 %v, i64 %n) {
424424
; CHECK-LABEL: Unscaled_Str64Ldr16_1:
425425
; CHECK: // %bb.0: // %entry
426426
; CHECK-NEXT: stur x1, [x0, #-8]
427-
; CHECK-NEXT: ubfx x0, x1, #16, #16
427+
; CHECK-NEXT: lsr w0, w1, #16
428428
; CHECK-NEXT: ret
429429
entry:
430430
%arrayidx0 = getelementptr inbounds i64, ptr %P, i64 -1
@@ -509,7 +509,7 @@ define i8 @Unscaled_Str64Ldr8_3(ptr nocapture %P, i64 %v, i64 %n) {
509509
; CHECK-LABEL: Unscaled_Str64Ldr8_3:
510510
; CHECK: // %bb.0: // %entry
511511
; CHECK-NEXT: stur x1, [x0, #-8]
512-
; CHECK-NEXT: ubfx x0, x1, #24, #8
512+
; CHECK-NEXT: lsr w0, w1, #24
513513
; CHECK-NEXT: ret
514514
entry:
515515
%arrayidx0 = getelementptr inbounds i64, ptr %P, i64 -1

llvm/test/CodeGen/AArch64/arm64_32.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,7 @@ declare i64 @get_int()
677677

678678
define i1 @test_icmp_ptr(ptr %in) {
679679
; CHECK-LABEL: test_icmp_ptr
680-
; CHECK: ubfx x0, x0, #31, #1
680+
; CHECK: lsr w0, w0, #31
681681
%res = icmp slt ptr %in, null
682682
ret i1 %res
683683
}

llvm/test/CodeGen/AArch64/bitfield-extract.ll

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ declare void @use(i16 signext, i64)
9999

100100
; CHECK-LABEL: test_complex_node:
101101
; CHECK: ldr d0, [x0], #8
102-
; CHECK: ubfx x[[VAL:[0-9]+]], x0, #5, #27
102+
; CHECK: lsr w[[VAL:[0-9]+]], w0, #5
103103
; CHECK: str w[[VAL]], [x2]
104104
define <2 x i32> @test_complex_node(ptr %addr, ptr %addr2, ptr %bf ) {
105105
%vec = load <2 x i32>, ptr %addr
@@ -113,3 +113,11 @@ define <2 x i32> @test_complex_node(ptr %addr, ptr %addr2, ptr %bf ) {
113113

114114
ret <2 x i32> %vec
115115
}
116+
117+
; CHECK-LABEL: @test12
118+
; CHECK: lsr w0, w0, #10
119+
define i32 @test12(i64 %a) {
120+
%tmp = trunc i64 %a to i32
121+
%res = lshr i32 %tmp, 10
122+
ret i32 %res
123+
}

llvm/test/CodeGen/AArch64/fast-isel-int-ext3.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ define i64 @load_unscaled_zext_i16_to_i64(i64 %a) {
5252
define i64 @load_unscaled_zext_i32_to_i64(i64 %a) {
5353
; CHECK-LABEL: load_unscaled_zext_i32_to_i64
5454
; CHECK: ldur w[[REG:[0-9]+]], [x0, #-8]
55-
; CHECK: ubfx x0, x[[REG]], #0, #32
55+
; CHECK: lsr w0, w[[REG]], #0
5656
%1 = sub i64 %a, 8
5757
%2 = inttoptr i64 %1 to ptr addrspace(256)
5858
%3 = load i32, ptr addrspace(256) %2

llvm/test/CodeGen/AArch64/fast-isel-shift.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,7 @@ define i64 @shl_zext_zero(i32 %a) {
681681
; CHECK-LABEL: shl_zext_zero:
682682
; CHECK: ; %bb.0:
683683
; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
684-
; CHECK-NEXT: ubfx x0, x0, #0, #32
684+
; CHECK-NEXT: lsr w0, w0, #0
685685
; CHECK-NEXT: ret
686686
%1 = zext i32 %a to i64
687687
%2 = shl i64 %1, 0
@@ -692,7 +692,7 @@ define i64 @lshr_zext_zero(i32 %a) {
692692
; CHECK-LABEL: lshr_zext_zero:
693693
; CHECK: ; %bb.0:
694694
; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
695-
; CHECK-NEXT: ubfx x0, x0, #0, #32
695+
; CHECK-NEXT: lsr w0, w0, #0
696696
; CHECK-NEXT: ret
697697
%1 = zext i32 %a to i64
698698
%2 = lshr i64 %1, 0
@@ -703,7 +703,7 @@ define i64 @ashr_zext_zero(i32 %a) {
703703
; CHECK-LABEL: ashr_zext_zero:
704704
; CHECK: ; %bb.0:
705705
; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
706-
; CHECK-NEXT: ubfx x0, x0, #0, #32
706+
; CHECK-NEXT: lsr w0, w0, #0
707707
; CHECK-NEXT: ret
708708
%1 = zext i32 %a to i64
709709
%2 = ashr i64 %1, 0

llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define i64 @csed_impdef_killflag(i64 %a) {
1212
; CHECK-NEXT: mov x9, #2 ; =0x2
1313
; CHECK-NEXT: csel w8, wzr, w8, ne
1414
; CHECK-NEXT: mov x10, #3 ; =0x3
15-
; CHECK-NEXT: ubfx x8, x8, #0, #32
15+
; CHECK-NEXT: lsr w8, w8, #0
1616
; CHECK-NEXT: csel x9, x9, x10, ne
1717
; CHECK-NEXT: add x0, x9, x8
1818
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/trunc-to-tbl.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
575575
; CHECK-NEXT: ldrb w14, [x0, #18]
576576
; CHECK-NEXT: ldrh w15, [x0, #16]
577577
; CHECK-NEXT: add x0, x0, #32
578-
; CHECK-NEXT: ubfx x12, x10, #12, #20
578+
; CHECK-NEXT: lsr w12, w10, #12
579579
; CHECK-NEXT: fmov s1, w9
580580
; CHECK-NEXT: lsr x11, x9, #19
581581
; CHECK-NEXT: lsr x13, x10, #31
@@ -586,7 +586,7 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
586586
; CHECK-NEXT: orr x11, x15, x14, lsl #16
587587
; CHECK-NEXT: mov.s v0[1], w13
588588
; CHECK-NEXT: extr x13, x11, x10, #50
589-
; CHECK-NEXT: ubfx x10, x11, #5, #27
589+
; CHECK-NEXT: lsr w10, w11, #5
590590
; CHECK-NEXT: mov.s v1[2], w12
591591
; CHECK-NEXT: mov.s v0[2], w13
592592
; CHECK-NEXT: mov.s v1[3], w9
@@ -616,14 +616,14 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
616616
; CHECK-BE-NEXT: lsr x15, x10, #40
617617
; CHECK-BE-NEXT: extr x12, x12, x11, #57
618618
; CHECK-BE-NEXT: fmov s0, w13
619-
; CHECK-BE-NEXT: ubfx x13, x10, #7, #25
619+
; CHECK-BE-NEXT: lsr w13, w10, #7
620620
; CHECK-BE-NEXT: extr x14, x15, x14, #50
621-
; CHECK-BE-NEXT: ubfx x15, x9, #14, #18
621+
; CHECK-BE-NEXT: lsr w15, w9, #14
622622
; CHECK-BE-NEXT: extr x9, x10, x9, #40
623623
; CHECK-BE-NEXT: fmov s1, w12
624624
; CHECK-BE-NEXT: orr w12, w17, w16, lsl #8
625625
; CHECK-BE-NEXT: mov v0.s[1], w14
626-
; CHECK-BE-NEXT: ubfx x9, x9, #12, #20
626+
; CHECK-BE-NEXT: lsr w9, w9, #12
627627
; CHECK-BE-NEXT: orr w11, w12, w11
628628
; CHECK-BE-NEXT: mov v1.s[1], w15
629629
; CHECK-BE-NEXT: lsr w11, w11, #19
@@ -657,14 +657,14 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
657657
; CHECK-DISABLE-NEXT: lsr x15, x10, #40
658658
; CHECK-DISABLE-NEXT: extr x12, x12, x11, #57
659659
; CHECK-DISABLE-NEXT: fmov s0, w13
660-
; CHECK-DISABLE-NEXT: ubfx x13, x10, #7, #25
660+
; CHECK-DISABLE-NEXT: lsr w13, w10, #7
661661
; CHECK-DISABLE-NEXT: extr x14, x15, x14, #50
662-
; CHECK-DISABLE-NEXT: ubfx x15, x9, #14, #18
662+
; CHECK-DISABLE-NEXT: lsr w15, w9, #14
663663
; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40
664664
; CHECK-DISABLE-NEXT: fmov s1, w12
665665
; CHECK-DISABLE-NEXT: orr w12, w17, w16, lsl #8
666666
; CHECK-DISABLE-NEXT: mov v0.s[1], w14
667-
; CHECK-DISABLE-NEXT: ubfx x9, x9, #12, #20
667+
; CHECK-DISABLE-NEXT: lsr w9, w9, #12
668668
; CHECK-DISABLE-NEXT: orr w11, w12, w11
669669
; CHECK-DISABLE-NEXT: mov v1.s[1], w15
670670
; CHECK-DISABLE-NEXT: lsr w11, w11, #19

llvm/test/CodeGen/AArch64/xbfiz.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,11 @@ define i32 @noubfiz32(i32 %v) {
6161
%add = add i32 %shl, %and
6262
ret i32 %add
6363
}
64+
65+
define i64 @lsl32_not_ubfiz64(i64 %v) {
66+
; CHECK-LABEL: lsl32_not_ubfiz64:
67+
; CHECK: lsl w0, w0, #6
68+
%shl = shl i64 %v, 6
69+
%and = and i64 %shl, 4294967295
70+
ret i64 %and
71+
}

llvm/test/CodeGen/AArch64/zext-to-tbl.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,7 +1259,7 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
12591259
; CHECK-NEXT: mov.b v1[5], w10
12601260
; CHECK-NEXT: ubfx w10, w9, #24, #4
12611261
; CHECK-NEXT: mov.b v1[6], w10
1262-
; CHECK-NEXT: ubfx x10, x9, #28, #4
1262+
; CHECK-NEXT: lsr w10, w9, #28
12631263
; CHECK-NEXT: mov.b v1[7], w10
12641264
; CHECK-NEXT: ubfx x10, x9, #32, #4
12651265
; CHECK-NEXT: mov.b v1[8], w10
@@ -1322,7 +1322,7 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
13221322
; CHECK-BE-NEXT: mov v1.b[6], w10
13231323
; CHECK-BE-NEXT: ubfx x10, x9, #32, #4
13241324
; CHECK-BE-NEXT: mov v1.b[7], w10
1325-
; CHECK-BE-NEXT: ubfx x10, x9, #28, #4
1325+
; CHECK-BE-NEXT: lsr w10, w9, #28
13261326
; CHECK-BE-NEXT: mov v1.b[8], w10
13271327
; CHECK-BE-NEXT: ubfx w10, w9, #24, #4
13281328
; CHECK-BE-NEXT: mov v1.b[9], w10

0 commit comments

Comments
 (0)