@@ -12,7 +12,7 @@ module Timings
12
12
13
13
using .. Core
14
14
using .. Compiler: - , + , :, Vector, length, first, empty!, push!, pop!, @inline ,
15
- @inbounds , copy, backtrace
15
+ @inbounds , copy, backtrace, _time_ns
16
16
17
17
# What we record for any given frame we infer during type inference.
18
18
struct InferenceFrameInfo
53
53
Timing (mi_info, start_time, cur_start_time, time, children) = Timing (mi_info, start_time, cur_start_time, time, children, nothing )
54
54
Timing (mi_info, start_time) = Timing (mi_info, start_time, start_time, UInt64 (0 ), Timing[])
55
55
56
- _time_ns () = ccall (:jl_hrtime , UInt64, ())
57
-
58
56
# We keep a stack of the Timings for each of the MethodInstances currently being timed.
59
57
# Since type inference currently operates via a depth-first search (during abstract
60
58
# evaluation), this vector operates like a call stack. The last node in _timings is the
@@ -103,7 +101,7 @@ function result_edges(interp::AbstractInterpreter, caller::InferenceState)
103
101
end
104
102
end
105
103
106
- function finish! (interp:: AbstractInterpreter , caller:: InferenceState , validation_world:: UInt )
104
+ function finish! (interp:: AbstractInterpreter , caller:: InferenceState , validation_world:: UInt , time_before :: UInt64 )
107
105
result = caller. result
108
106
# @assert last(result.valid_worlds) <= get_world_counter() || isempty(caller.edges)
109
107
if isdefined (result, :ci )
@@ -142,9 +140,12 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
142
140
if ! @isdefined di
143
141
di = DebugInfo (result. linfo)
144
142
end
145
- ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
143
+ time_now = _time_ns ()
144
+ time_self_ns = caller. time_self_ns + (time_now - time_before)
145
+ time_total = (time_now - caller. time_start - caller. time_paused) * 1e-9
146
+ ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
146
147
ci, inferred_result, const_flag, first (result. valid_worlds), last (result. valid_worlds), encode_effects (result. ipo_effects),
147
- result. analysis_results, di, edges)
148
+ result. analysis_results, time_total, caller . time_caches, time_self_ns * 1e-9 , di, edges)
148
149
engine_reject (interp, ci)
149
150
codegen = codegen_cache (interp)
150
151
if ! discard_src && codegen != = nothing && uncompressed isa CodeInfo
@@ -186,8 +187,8 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
186
187
end
187
188
ccall (:jl_fill_codeinst , Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
188
189
ci, rettype, exctype, nothing , const_flags, min_world, max_world, ipo_effects, nothing , di, edges)
189
- ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
190
- ci, nothing , const_flag, min_world, max_world, ipo_effects, nothing , di, edges)
190
+ ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
191
+ ci, nothing , const_flag, min_world, max_world, ipo_effects, nothing , 0.0 , 0.0 , 0.0 , di, edges)
191
192
code_cache (interp)[mi] = ci
192
193
codegen = codegen_cache (interp)
193
194
if codegen != = nothing
@@ -197,14 +198,14 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
197
198
return nothing
198
199
end
199
200
200
- function finish_nocycle (:: AbstractInterpreter , frame:: InferenceState )
201
+ function finish_nocycle (:: AbstractInterpreter , frame:: InferenceState , time_before :: UInt64 )
201
202
finishinfer! (frame, frame. interp, frame. cycleid)
202
203
opt = frame. result. src
203
204
if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
204
205
optimize (frame. interp, opt, frame. result)
205
206
end
206
207
validation_world = get_world_counter ()
207
- finish! (frame. interp, frame, validation_world)
208
+ finish! (frame. interp, frame, validation_world, time_before )
208
209
if isdefined (frame. result, :ci )
209
210
# After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
210
211
# (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
@@ -219,7 +220,7 @@ function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
219
220
return nothing
220
221
end
221
222
222
- function finish_cycle (:: AbstractInterpreter , frames:: Vector{AbsIntState} , cycleid:: Int )
223
+ function finish_cycle (:: AbstractInterpreter , frames:: Vector{AbsIntState} , cycleid:: Int , time_before :: UInt64 )
223
224
cycle_valid_worlds = WorldRange ()
224
225
cycle_valid_effects = EFFECTS_TOTAL
225
226
for frameid = cycleid: length (frames)
@@ -236,23 +237,45 @@ function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cyclei
236
237
caller = frames[frameid]:: InferenceState
237
238
adjust_cycle_frame! (caller, cycle_valid_worlds, cycle_valid_effects)
238
239
finishinfer! (caller, caller. interp, cycleid)
240
+ time_now = _time_ns ()
241
+ caller. time_self_ns += (time_now - time_before)
242
+ time_before = time_now
239
243
end
244
+ time_caches = 0.0 # the total and adjusted time of every entry in the cycle are the same
245
+ time_paused = UInt64 (0 )
240
246
for frameid = cycleid: length (frames)
241
247
caller = frames[frameid]:: InferenceState
242
248
opt = caller. result. src
243
249
if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
244
250
optimize (caller. interp, opt, caller. result)
251
+ time_now = _time_ns ()
252
+ caller. time_self_ns += (time_now - time_before)
253
+ time_before = time_now
245
254
end
255
+ time_caches += caller. time_caches
256
+ time_paused += caller. time_paused
257
+ caller. time_paused = UInt64 (0 )
258
+ caller. time_caches = 0.0
246
259
end
260
+ cycletop = frames[cycleid]:: InferenceState
261
+ time_start = cycletop. time_start
247
262
validation_world = get_world_counter ()
248
263
cis = CodeInstance[]
249
264
for frameid = cycleid: length (frames)
250
265
caller = frames[frameid]:: InferenceState
251
- finish! (caller. interp, caller, validation_world)
266
+ caller. time_start = time_start
267
+ caller. time_caches = time_caches
268
+ caller. time_paused = time_paused
269
+ finish! (caller. interp, caller, validation_world, time_before)
252
270
if isdefined (caller. result, :ci )
253
271
push! (cis, caller. result. ci)
254
272
end
255
273
end
274
+ if cycletop. parentid != 0
275
+ parent = frames[cycletop. parentid]
276
+ parent. time_caches += time_caches
277
+ parent. time_paused += time_paused
278
+ end
256
279
# After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
257
280
# (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
258
281
# validity.
@@ -792,9 +815,10 @@ function return_cached_result(interp::AbstractInterpreter, method::Method, codei
792
815
rt = cached_return_type (codeinst)
793
816
exct = codeinst. exctype
794
817
effects = ipo_effects (codeinst)
795
- edge = codeinst
796
818
update_valid_age! (caller, WorldRange (min_world (codeinst), max_world (codeinst)))
797
- return Future (MethodCallResult (interp, caller, method, rt, exct, effects, edge, edgecycle, edgelimited))
819
+ caller. time_caches += reinterpret (Float16, codeinst. time_infer_total)
820
+ caller. time_caches += reinterpret (Float16, codeinst. time_infer_cache_saved)
821
+ return Future (MethodCallResult (interp, caller, method, rt, exct, effects, codeinst, edgecycle, edgelimited))
798
822
end
799
823
800
824
function MethodCallResult (:: AbstractInterpreter , sv:: AbsIntState , method:: Method ,
@@ -890,7 +914,9 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
890
914
if frame === false
891
915
# completely new, but check again after reserving in the engine
892
916
if cache_mode == CACHE_MODE_GLOBAL
917
+ reserve_start = _time_ns () # subtract engine_reserve (thread-synchronization) time from callers to avoid double-counting
893
918
ci_from_engine = engine_reserve (interp, mi)
919
+ caller. time_paused += (_time_ns () - reserve_start)
894
920
edge_ci = ci_from_engine
895
921
codeinst = get (code_cache (interp), mi, nothing )
896
922
if codeinst isa CodeInstance # return existing rettype if the code is already inferred
0 commit comments