@@ -8,8 +8,9 @@ L_dyn_call_begin:
8
8
# At this point, the following registers are bound:
9
9
#
10
10
# rdi <- callee
11
- # rsi <- argv
12
- # rdx <- argc
11
+ # rsi <- process
12
+ # rdx <- argv
13
+ # rcx <- argc
13
14
#
14
15
# Save the parent base pointer for when control returns to this call frame.
15
16
# CFA directives will inform the unwinder to expect rbp at the bottom of the
@@ -20,14 +21,15 @@ L_dyn_call_begin:
20
21
mov rbp, rsp
21
22
.cfi_def_cfa_register rbp
22
23
23
- # Save our callee and argv pointers, and argc
24
+ # Save our callee, process and argv pointers, and argc
24
25
mov r10, rdi
25
26
mov r11, rsi
26
- mov rax, rdx
27
+ mov rdi, rdx
28
+ mov rax, rcx
27
29
28
- # Determine if spills are needed
30
+ # Determine if spills are needed (argc + 1 should be <= 8 when not needed)
29
31
# In the common case in which they are not, we perform a tail call
30
- cmp rdx, 7
32
+ cmp rcx, 6
31
33
ja L_dyn_call_spill
32
34
33
35
L_dyn_call_no_spill:
@@ -38,62 +40,52 @@ L_dyn_call_no_spill:
38
40
39
41
# Calculate offset in jump table to block which handles the specific
40
42
# number of registers we have arguments for, then jump to that block
41
- lea rcx , [rip + L_dyn_call_jt]
42
- mov rax, [rcx + rax * 4 ]
43
- add rax, rcx
44
- jmp [ rax]
43
+ lea rdx , [rip + L_dyn_call_jt]
44
+ movsxd rax, dword ptr [rdx + 4*rax ]
45
+ add rax, rdx
46
+ jmp rax
45
47
46
48
# All of these basic blocks perform a tail call. As such,
47
49
# the unwinder will skip over this frame should the callee
48
50
# throw an exception
49
51
L_dyn_call_regs0:
50
52
pop rbp
51
- jmp [ r10]
53
+ jmp r10
52
54
53
55
L_dyn_call_regs1:
54
- mov rdi , [r11 ]
56
+ mov rsi , [rdi ]
55
57
pop rbp
56
- jmp [ r10]
58
+ jmp r10
57
59
58
60
L_dyn_call_regs2:
59
- mov rdi , [r11 ]
60
- mov rsi , [r11 + 8]
61
+ mov rsi , [rdi ]
62
+ mov rdx , [rdi + 8]
61
63
pop rbp
62
- jmp [ r10]
64
+ jmp r10
63
65
64
66
L_dyn_call_regs3:
65
- mov rdi , [r11 ]
66
- mov rsi , [r11 + 8]
67
- mov rdx , [r11 + 16]
67
+ mov rsi , [rdi ]
68
+ mov rdx , [rdi + 8]
69
+ mov rcx , [rdi + 16]
68
70
pop rbp
69
- jmp [ r10]
71
+ jmp r10
70
72
71
73
L_dyn_call_regs4:
72
- mov rdi , [r11 ]
73
- mov rsi , [r11 + 8]
74
- mov rdx , [r11 + 16]
75
- mov rcx , [r11 + 24]
74
+ mov rsi , [rdi ]
75
+ mov rdx , [rdi + 8]
76
+ mov rcx , [rdi + 16]
77
+ mov r8 , [rdi + 24]
76
78
pop rbp
77
- jmp [ r10]
79
+ jmp r10
78
80
79
81
L_dyn_call_regs5:
80
- mov rdi , [r11 ]
81
- mov rsi , [r11 + 8]
82
- mov rdx , [r11 + 16]
83
- mov rcx , [r11 + 24]
84
- mov r8 , [r11 + 32]
82
+ mov rsi , [rdi ]
83
+ mov rdx , [rdi + 8]
84
+ mov rcx , [rdi + 16]
85
+ mov r8 , [rdi + 24]
86
+ mov r9 , [rdi + 32]
85
87
pop rbp
86
- jmp [r10]
87
-
88
- L_dyn_call_regs6:
89
- mov rdi, [r11]
90
- mov rsi, [r11 + 8]
91
- mov rdx, [r11 + 16]
92
- mov rcx, [r11 + 24]
93
- mov r8, [r11 + 32]
94
- mov r9, [r11 + 40]
95
- pop rbp
96
- jmp [r10]
88
+ jmp r10
97
89
98
90
L_dyn_call_spill:
99
91
# If we hit this block, we have identified that there are
@@ -102,7 +94,7 @@ L_dyn_call_spill:
102
94
103
95
# Calculate spill count for later (rep uses rcx for the iteration count,
104
96
# which in this case is the number of quadwords to copy)
105
- mov rcx, rdx
97
+ mov r8, rcx
106
98
sub rcx, 6
107
99
108
100
# Calculate spill space, and ensure it is rounded up to the nearest 16 bytes.
@@ -113,21 +105,24 @@ L_dyn_call_spill:
113
105
sub rsp, rax
114
106
115
107
# load source pointer (last item of argv)
116
- lea rsi, [r11 + rdx * 8 - 8]
108
+ lea rsi, [rdi + r8 * 8 - 8]
117
109
# load destination pointer (top of spill region)
118
- lea rdi, [rsp + rcx * 8 - 8]
110
+ lea rdi, [rsp + rcx * 8 - 8]
119
111
# copy rcx quadwords from rsi to rdi, in reverse
120
112
std
121
113
rep movsq
122
114
cld
123
115
124
116
# We've spilled arguments, so we have at least 6 args
125
- mov rdi, [r11]
126
- mov rsi, [r11 + 8]
117
+ mov r8, rdi # We need to move rdi to r11, but it is occupied, so temporarily move to r8
118
+ mov rdi, r11 # Move process pointer to rdi
119
+ mov r11, r8 # Move r8 to r11
120
+ mov rsi, [r11]
121
+ mov rsi, [r11 + 8]
127
122
mov rdx, [r11 + 16]
128
123
mov rcx, [r11 + 24]
129
- mov r8, [r11 + 32]
130
- mov r9, [r11 + 40]
124
+ mov r8, [r11 + 32]
125
+ mov r9, [r11 + 40]
131
126
132
127
L_dyn_call_exec:
133
128
# If we spill arguments to the stack, we can't perform
@@ -141,7 +136,7 @@ L_dyn_call_exec:
141
136
# This instruction will push the return address and jump,
142
137
# and we can expect rbp to be the same as we left it upon
143
138
# return.
144
- call [ r10]
139
+ call r10
145
140
146
141
L_dyn_call_ret:
147
142
# Non-tail call completed successfully
@@ -156,21 +151,19 @@ L_dyn_call_end:
156
151
# a variable number of register-based arguments
157
152
.p2align 2
158
153
.data_region jt32
159
- .set L_dyn_call_jt_entry0, L_dyn_call_exec -L_dyn_call_jt
154
+ .set L_dyn_call_jt_entry0, L_dyn_call_regs0 -L_dyn_call_jt
160
155
.set L_dyn_call_jt_entry1, L_dyn_call_regs1-L_dyn_call_jt
161
156
.set L_dyn_call_jt_entry2, L_dyn_call_regs2-L_dyn_call_jt
162
157
.set L_dyn_call_jt_entry3, L_dyn_call_regs3-L_dyn_call_jt
163
158
.set L_dyn_call_jt_entry4, L_dyn_call_regs4-L_dyn_call_jt
164
159
.set L_dyn_call_jt_entry5, L_dyn_call_regs5-L_dyn_call_jt
165
- .set L_dyn_call_jt_entry6, L_dyn_call_regs6-L_dyn_call_jt
166
160
L_dyn_call_jt:
167
161
.long L_dyn_call_jt_entry0
168
162
.long L_dyn_call_jt_entry1
169
163
.long L_dyn_call_jt_entry2
170
164
.long L_dyn_call_jt_entry3
171
165
.long L_dyn_call_jt_entry4
172
166
.long L_dyn_call_jt_entry5
173
- .long L_dyn_call_jt_entry6
174
167
.end_data_region
175
168
176
169
# The following is the LSDA metadata for exception handling
0 commit comments