Skip to content

Commit 10edabb

Browse files
authored
[X86][GlobalISel] Enable G_SDIV/G_UDIV/G_SREM/G_UREM (llvm#81615)
* Create a libcall for s64 type for 32 bit targets. * Fix a bug in REM selection: SUBREG_TO_REG is not intended to produce a value from super registers. * Replace selector tests by end-to-end tests. Other passes check the selected MIR better.
1 parent 1c01651 commit 10edabb

26 files changed

+1044
-3388
lines changed

llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp

+3-6
Original file line numberDiff line numberDiff line change
@@ -1778,12 +1778,9 @@ bool X86InstructionSelector::selectMulDivRem(MachineInstr &I,
17781778
.addImm(8);
17791779

17801780
// Now reference the 8-bit subreg of the result.
1781-
BuildMI(*I.getParent(), I, I.getDebugLoc(),
1782-
TII.get(TargetOpcode::SUBREG_TO_REG))
1783-
.addDef(DstReg)
1784-
.addImm(0)
1785-
.addReg(ResultSuperReg)
1786-
.addImm(X86::sub_8bit);
1781+
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
1782+
DstReg)
1783+
.addReg(ResultSuperReg, 0, X86::sub_8bit);
17871784
} else {
17881785
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
17891786
DstReg)

llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
213213
return typeInSet(0, {s8, s16, s32})(Query) ||
214214
(Is64Bit && typeInSet(0, {s64})(Query));
215215
})
216+
.libcallFor({s64})
216217
.clampScalar(0, s8, sMaxScalar);
217218

218219
// integer shifts
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X64
3+
# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X86
4+
5+
...
6+
---
7+
name: test_sdiv_i8
8+
tracksRegLiveness: true
9+
body: |
10+
bb.1:
11+
liveins: $edi, $esi
12+
13+
; CHECK-LABEL: name: test_sdiv_i8
14+
; CHECK: liveins: $edi, $esi
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi
17+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
18+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
19+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
20+
; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s8) = G_SDIV [[TRUNC]], [[TRUNC1]]
21+
; CHECK-NEXT: $al = COPY [[SDIV]](s8)
22+
; CHECK-NEXT: RET 0, implicit $al
23+
%2:_(s32) = COPY $edi
24+
%0:_(s8) = G_TRUNC %2(s32)
25+
%3:_(s32) = COPY $esi
26+
%1:_(s8) = G_TRUNC %3(s32)
27+
%4:_(s8) = G_SDIV %0, %1
28+
$al = COPY %4(s8)
29+
RET 0, implicit $al
30+
31+
...
32+
---
33+
name: test_sdiv_i16
34+
tracksRegLiveness: true
35+
body: |
36+
bb.1:
37+
liveins: $edi, $esi
38+
39+
; CHECK-LABEL: name: test_sdiv_i16
40+
; CHECK: liveins: $edi, $esi
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi
43+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
44+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
45+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
46+
; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[TRUNC]], [[TRUNC1]]
47+
; CHECK-NEXT: $ax = COPY [[SDIV]](s16)
48+
; CHECK-NEXT: RET 0, implicit $ax
49+
%2:_(s32) = COPY $edi
50+
%0:_(s16) = G_TRUNC %2(s32)
51+
%3:_(s32) = COPY $esi
52+
%1:_(s16) = G_TRUNC %3(s32)
53+
%4:_(s16) = G_SDIV %0, %1
54+
$ax = COPY %4(s16)
55+
RET 0, implicit $ax
56+
57+
...
58+
---
59+
name: test_sdiv_i32
60+
tracksRegLiveness: true
61+
body: |
62+
bb.1:
63+
liveins: $edi, $esi
64+
65+
; CHECK-LABEL: name: test_sdiv_i32
66+
; CHECK: liveins: $edi, $esi
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi
69+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
70+
; CHECK-NEXT: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[COPY]], [[COPY1]]
71+
; CHECK-NEXT: $eax = COPY [[SDIV]](s32)
72+
; CHECK-NEXT: RET 0, implicit $eax
73+
%0:_(s32) = COPY $edi
74+
%1:_(s32) = COPY $esi
75+
%2:_(s32) = G_SDIV %0, %1
76+
$eax = COPY %2(s32)
77+
RET 0, implicit $eax
78+
79+
...
80+
---
81+
name: test_sdiv_i64
82+
tracksRegLiveness: true
83+
body: |
84+
bb.1:
85+
; X64-LABEL: name: test_sdiv_i64
86+
; X64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
87+
; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
88+
; X64-NEXT: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[DEF]], [[DEF1]]
89+
; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SDIV]](s64)
90+
; X64-NEXT: RET 0, implicit [[COPY]](s64)
91+
;
92+
; X86-LABEL: name: test_sdiv_i64
93+
; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
94+
; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
95+
; X86-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
96+
; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
97+
; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp
98+
; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
99+
; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
100+
; X86-NEXT: G_STORE [[UV]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
101+
; X86-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
102+
; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
103+
; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
104+
; X86-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
105+
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
106+
; X86-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp
107+
; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
108+
; X86-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32)
109+
; X86-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1)
110+
; X86-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp
111+
; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
112+
; X86-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32)
113+
; X86-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1)
114+
; X86-NEXT: CALLpcrel32 &__divdi3, csr_32, implicit $esp, implicit $ssp, implicit-def $eax, implicit-def $edx
115+
; X86-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $eax
116+
; X86-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $edx
117+
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
118+
; X86-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
119+
; X86-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
120+
; X86-NEXT: RET 0, implicit [[COPY6]](s64)
121+
%0:_(s64) = IMPLICIT_DEF
122+
%1:_(s64) = IMPLICIT_DEF
123+
%2:_(s64) = G_SDIV %0, %1
124+
%3:_(s64) = COPY %2(s64)
125+
RET 0, implicit %3
126+
127+
...
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X64
3+
# RUN: llc -mtriple=i686-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,X86
4+
5+
...
6+
---
7+
name: test_srem_i8
8+
tracksRegLiveness: true
9+
body: |
10+
bb.1:
11+
liveins: $edi, $esi
12+
13+
; CHECK-LABEL: name: test_srem_i8
14+
; CHECK: liveins: $edi, $esi
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi
17+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
18+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
19+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
20+
; CHECK-NEXT: [[SREM:%[0-9]+]]:_(s8) = G_SREM [[TRUNC]], [[TRUNC1]]
21+
; CHECK-NEXT: $al = COPY [[SREM]](s8)
22+
; CHECK-NEXT: RET 0, implicit $al
23+
%2:_(s32) = COPY $edi
24+
%0:_(s8) = G_TRUNC %2(s32)
25+
%3:_(s32) = COPY $esi
26+
%1:_(s8) = G_TRUNC %3(s32)
27+
%4:_(s8) = G_SREM %0, %1
28+
$al = COPY %4(s8)
29+
RET 0, implicit $al
30+
31+
...
32+
---
33+
name: test_srem_i16
34+
tracksRegLiveness: true
35+
body: |
36+
bb.1:
37+
liveins: $edi, $esi
38+
39+
; CHECK-LABEL: name: test_srem_i16
40+
; CHECK: liveins: $edi, $esi
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi
43+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
44+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
45+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
46+
; CHECK-NEXT: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[TRUNC]], [[TRUNC1]]
47+
; CHECK-NEXT: $ax = COPY [[SREM]](s16)
48+
; CHECK-NEXT: RET 0, implicit $ax
49+
%2:_(s32) = COPY $edi
50+
%0:_(s16) = G_TRUNC %2(s32)
51+
%3:_(s32) = COPY $esi
52+
%1:_(s16) = G_TRUNC %3(s32)
53+
%4:_(s16) = G_SREM %0, %1
54+
$ax = COPY %4(s16)
55+
RET 0, implicit $ax
56+
57+
...
58+
---
59+
name: test_srem_i32
60+
tracksRegLiveness: true
61+
body: |
62+
bb.1:
63+
liveins: $edi, $esi
64+
65+
; CHECK-LABEL: name: test_srem_i32
66+
; CHECK: liveins: $edi, $esi
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $edi
69+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
70+
; CHECK-NEXT: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[COPY]], [[COPY1]]
71+
; CHECK-NEXT: $eax = COPY [[SREM]](s32)
72+
; CHECK-NEXT: RET 0, implicit $eax
73+
%0:_(s32) = COPY $edi
74+
%1:_(s32) = COPY $esi
75+
%2:_(s32) = G_SREM %0, %1
76+
$eax = COPY %2(s32)
77+
RET 0, implicit $eax
78+
79+
...
80+
---
81+
name: test_srem_i64
82+
tracksRegLiveness: true
83+
body: |
84+
bb.1:
85+
; X64-LABEL: name: test_srem_i64
86+
; X64: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
87+
; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
88+
; X64-NEXT: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[DEF]], [[DEF1]]
89+
; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[SREM]](s64)
90+
; X64-NEXT: RET 0, implicit [[COPY]](s64)
91+
;
92+
; X86-LABEL: name: test_srem_i64
93+
; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
94+
; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
95+
; X86-NEXT: ADJCALLSTACKDOWN32 16, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
96+
; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
97+
; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp
98+
; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
99+
; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
100+
; X86-NEXT: G_STORE [[UV]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
101+
; X86-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
102+
; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
103+
; X86-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
104+
; X86-NEXT: G_STORE [[UV1]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
105+
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
106+
; X86-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $esp
107+
; X86-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
108+
; X86-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32)
109+
; X86-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1)
110+
; X86-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $esp
111+
; X86-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
112+
; X86-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32)
113+
; X86-NEXT: G_STORE [[UV3]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1)
114+
; X86-NEXT: CALLpcrel32 &__moddi3, csr_32, implicit $esp, implicit $ssp, implicit-def $eax, implicit-def $edx
115+
; X86-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $eax
116+
; X86-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $edx
117+
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
118+
; X86-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
119+
; X86-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
120+
; X86-NEXT: RET 0, implicit [[COPY6]](s64)
121+
%0:_(s64) = IMPLICIT_DEF
122+
%1:_(s64) = IMPLICIT_DEF
123+
%2:_(s64) = G_SREM %0, %1
124+
%3:_(s64) = COPY %2(s64)
125+
RET 0, implicit %3
126+
127+
...

0 commit comments

Comments
 (0)