Skip to content

Commit 17e9f40

Browse files
Merge branch 'master' into jldoctest-fix
2 parents 15934ed + 18b5d8f commit 17e9f40

File tree

38 files changed

+447
-279
lines changed

38 files changed

+447
-279
lines changed

Compiler/src/abstractinterpretation.jl

+16-3
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,8 @@ function const_prop_call(interp::AbstractInterpreter,
13521352
end
13531353
assign_parentchild!(frame, sv)
13541354
if !typeinf(interp, frame)
1355+
sv.time_caches += frame.time_caches
1356+
sv.time_paused += frame.time_paused
13551357
add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
13561358
@assert frame.frameid != 0 && frame.cycleid == frame.frameid
13571359
callstack = frame.callstack::Vector{AbsIntState}
@@ -4357,6 +4359,7 @@ end
43574359
# make as much progress on `frame` as possible (by handling cycles)
43584360
warnlength::Int = 2500
43594361
function typeinf(interp::AbstractInterpreter, frame::InferenceState)
4362+
time_before = _time_ns()
43604363
callstack = frame.callstack::Vector{AbsIntState}
43614364
nextstates = CurrentState[]
43624365
takenext = frame.frameid
@@ -4388,24 +4391,30 @@ function typeinf(interp::AbstractInterpreter, frame::InferenceState)
43884391
# get_compileable_sig), but still must be finished up since it may see and
43894392
# change the local variables of the InferenceState at currpc, we do this
43904393
# even if the nextresult status is already completed.
4391-
continue
43924394
elseif isdefined(nextstates[nextstateid], :result) || !isempty(callee.ip)
43934395
# Next make progress on this frame
43944396
prev = length(callee.tasks) + 1
43954397
nextstates[nextstateid] = typeinf_local(interp, callee, nextstates[nextstateid])
43964398
reverse!(callee.tasks, prev)
43974399
elseif callee.cycleid == length(callstack)
43984400
# With no active ip's and no cycles, frame is done
4399-
finish_nocycle(interp, callee)
4401+
time_now = _time_ns()
4402+
callee.time_self_ns += (time_now - time_before)
4403+
time_before = time_now
4404+
finish_nocycle(interp, callee, time_before)
44004405
callee.frameid == 0 && break
44014406
takenext = length(callstack)
44024407
nextstateid = takenext + 1 - frame.frameid
44034408
#@assert length(nextstates) == nextstateid + 1
44044409
#@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
44054410
resize!(nextstates, nextstateid)
4411+
continue
44064412
elseif callee.cycleid == callee.frameid
44074413
# If the current frame is the top part of a cycle, check if the whole cycle
44084414
# is done, and if not, pick the next item to work on.
4415+
time_now = _time_ns()
4416+
callee.time_self_ns += (time_now - time_before)
4417+
time_before = time_now
44094418
no_active_ips_in_cycle = true
44104419
for i = callee.cycleid:length(callstack)
44114420
caller = callstack[i]::InferenceState
@@ -4416,7 +4425,7 @@ function typeinf(interp::AbstractInterpreter, frame::InferenceState)
44164425
end
44174426
end
44184427
if no_active_ips_in_cycle
4419-
finish_cycle(interp, callstack, callee.cycleid)
4428+
finish_cycle(interp, callstack, callee.cycleid, time_before)
44204429
end
44214430
takenext = length(callstack)
44224431
nextstateid = takenext + 1 - frame.frameid
@@ -4426,10 +4435,14 @@ function typeinf(interp::AbstractInterpreter, frame::InferenceState)
44264435
else
44274436
#@assert length(nextstates) == nextstateid
44284437
end
4438+
continue
44294439
else
44304440
# Continue to the next frame in this cycle
44314441
takenext = takenext - 1
44324442
end
4443+
time_now = _time_ns()
4444+
callee.time_self_ns += (time_now - time_before)
4445+
time_before = time_now
44334446
end
44344447
#@assert all(nextresult -> !isdefined(nextresult, :result), nextstates)
44354448
return is_inferred(frame)

Compiler/src/inferencestate.jl

+9-1
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,10 @@ mutable struct InferenceState
302302
bestguess #::Type
303303
exc_bestguess
304304
ipo_effects::Effects
305+
time_start::UInt64
306+
time_caches::Float64
307+
time_paused::UInt64
308+
time_self_ns::UInt64
305309

306310
#= flags =#
307311
# Whether to restrict inference of abstract call sites to avoid excessive work
@@ -392,6 +396,7 @@ mutable struct InferenceState
392396
currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, bb_saw_latestworld, ssavaluetypes, ssaflags, edges, stmt_info,
393397
tasks, pclimitations, limitations, cycle_backedges, callstack, parentid, frameid, cycleid,
394398
result, unreachable, bestguess, exc_bestguess, ipo_effects,
399+
_time_ns(), 0.0, 0, 0,
395400
restrict_abstract_call_sites, cache_mode, insert_coverage,
396401
interp)
397402

@@ -815,6 +820,8 @@ mutable struct IRInterpretationState
815820
const mi::MethodInstance
816821
world::WorldWithRange
817822
curridx::Int
823+
time_caches::Float64
824+
time_paused::UInt64
818825
const argtypes_refined::Vector{Bool}
819826
const sptypes::Vector{VarState}
820827
const tpdum::TwoPhaseDefUseMap
@@ -849,7 +856,8 @@ mutable struct IRInterpretationState
849856
tasks = WorkThunk[]
850857
edges = Any[]
851858
callstack = AbsIntState[]
852-
return new(spec_info, ir, mi, WorldWithRange(world, valid_worlds), curridx, argtypes_refined, ir.sptypes, tpdum,
859+
return new(spec_info, ir, mi, WorldWithRange(world, valid_worlds),
860+
curridx, 0.0, 0, argtypes_refined, ir.sptypes, tpdum,
853861
ssa_refined, lazyreachability, tasks, edges, callstack, 0, 0)
854862
end
855863
end

Compiler/src/typeinfer.jl

+40-14
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ module Timings
1212

1313
using ..Core
1414
using ..Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
15-
@inbounds, copy, backtrace
15+
@inbounds, copy, backtrace, _time_ns
1616

1717
# What we record for any given frame we infer during type inference.
1818
struct InferenceFrameInfo
@@ -53,8 +53,6 @@ end
5353
Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
5454
Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
5555

56-
_time_ns() = ccall(:jl_hrtime, UInt64, ())
57-
5856
# We keep a stack of the Timings for each of the MethodInstances currently being timed.
5957
# Since type inference currently operates via a depth-first search (during abstract
6058
# evaluation), this vector operates like a call stack. The last node in _timings is the
@@ -103,7 +101,7 @@ function result_edges(interp::AbstractInterpreter, caller::InferenceState)
103101
end
104102
end
105103

106-
function finish!(interp::AbstractInterpreter, caller::InferenceState, validation_world::UInt)
104+
function finish!(interp::AbstractInterpreter, caller::InferenceState, validation_world::UInt, time_before::UInt64)
107105
result = caller.result
108106
#@assert last(result.valid_worlds) <= get_world_counter() || isempty(caller.edges)
109107
if isdefined(result, :ci)
@@ -142,9 +140,12 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
142140
if !@isdefined di
143141
di = DebugInfo(result.linfo)
144142
end
145-
ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
143+
time_now = _time_ns()
144+
time_self_ns = caller.time_self_ns + (time_now - time_before)
145+
time_total = (time_now - caller.time_start - caller.time_paused) * 1e-9
146+
ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
146147
ci, inferred_result, const_flag, first(result.valid_worlds), last(result.valid_worlds), encode_effects(result.ipo_effects),
147-
result.analysis_results, di, edges)
148+
result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, di, edges)
148149
engine_reject(interp, ci)
149150
codegen = codegen_cache(interp)
150151
if !discard_src && codegen !== nothing && uncompressed isa CodeInfo
@@ -186,8 +187,8 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
186187
end
187188
ccall(:jl_fill_codeinst, Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
188189
ci, rettype, exctype, nothing, const_flags, min_world, max_world, ipo_effects, nothing, di, edges)
189-
ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
190-
ci, nothing, const_flag, min_world, max_world, ipo_effects, nothing, di, edges)
190+
ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
191+
ci, nothing, const_flag, min_world, max_world, ipo_effects, nothing, 0.0, 0.0, 0.0, di, edges)
191192
code_cache(interp)[mi] = ci
192193
codegen = codegen_cache(interp)
193194
if codegen !== nothing
@@ -197,14 +198,14 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
197198
return nothing
198199
end
199200

200-
function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
201+
function finish_nocycle(::AbstractInterpreter, frame::InferenceState, time_before::UInt64)
201202
finishinfer!(frame, frame.interp, frame.cycleid)
202203
opt = frame.result.src
203204
if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
204205
optimize(frame.interp, opt, frame.result)
205206
end
206207
validation_world = get_world_counter()
207-
finish!(frame.interp, frame, validation_world)
208+
finish!(frame.interp, frame, validation_world, time_before)
208209
if isdefined(frame.result, :ci)
209210
# After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
210211
# (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
@@ -219,7 +220,7 @@ function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
219220
return nothing
220221
end
221222

222-
function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cycleid::Int)
223+
function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cycleid::Int, time_before::UInt64)
223224
cycle_valid_worlds = WorldRange()
224225
cycle_valid_effects = EFFECTS_TOTAL
225226
for frameid = cycleid:length(frames)
@@ -236,23 +237,45 @@ function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cyclei
236237
caller = frames[frameid]::InferenceState
237238
adjust_cycle_frame!(caller, cycle_valid_worlds, cycle_valid_effects)
238239
finishinfer!(caller, caller.interp, cycleid)
240+
time_now = _time_ns()
241+
caller.time_self_ns += (time_now - time_before)
242+
time_before = time_now
239243
end
244+
time_caches = 0.0 # the total and adjusted time of every entry in the cycle are the same
245+
time_paused = UInt64(0)
240246
for frameid = cycleid:length(frames)
241247
caller = frames[frameid]::InferenceState
242248
opt = caller.result.src
243249
if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
244250
optimize(caller.interp, opt, caller.result)
251+
time_now = _time_ns()
252+
caller.time_self_ns += (time_now - time_before)
253+
time_before = time_now
245254
end
255+
time_caches += caller.time_caches
256+
time_paused += caller.time_paused
257+
caller.time_paused = UInt64(0)
258+
caller.time_caches = 0.0
246259
end
260+
cycletop = frames[cycleid]::InferenceState
261+
time_start = cycletop.time_start
247262
validation_world = get_world_counter()
248263
cis = CodeInstance[]
249264
for frameid = cycleid:length(frames)
250265
caller = frames[frameid]::InferenceState
251-
finish!(caller.interp, caller, validation_world)
266+
caller.time_start = time_start
267+
caller.time_caches = time_caches
268+
caller.time_paused = time_paused
269+
finish!(caller.interp, caller, validation_world, time_before)
252270
if isdefined(caller.result, :ci)
253271
push!(cis, caller.result.ci)
254272
end
255273
end
274+
if cycletop.parentid != 0
275+
parent = frames[cycletop.parentid]
276+
parent.time_caches += time_caches
277+
parent.time_paused += time_paused
278+
end
256279
# After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
257280
# (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
258281
# validity.
@@ -792,9 +815,10 @@ function return_cached_result(interp::AbstractInterpreter, method::Method, codei
792815
rt = cached_return_type(codeinst)
793816
exct = codeinst.exctype
794817
effects = ipo_effects(codeinst)
795-
edge = codeinst
796818
update_valid_age!(caller, WorldRange(min_world(codeinst), max_world(codeinst)))
797-
return Future(MethodCallResult(interp, caller, method, rt, exct, effects, edge, edgecycle, edgelimited))
819+
caller.time_caches += reinterpret(Float16, codeinst.time_infer_total)
820+
caller.time_caches += reinterpret(Float16, codeinst.time_infer_cache_saved)
821+
return Future(MethodCallResult(interp, caller, method, rt, exct, effects, codeinst, edgecycle, edgelimited))
798822
end
799823

800824
function MethodCallResult(::AbstractInterpreter, sv::AbsIntState, method::Method,
@@ -890,7 +914,9 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
890914
if frame === false
891915
# completely new, but check again after reserving in the engine
892916
if cache_mode == CACHE_MODE_GLOBAL
917+
reserve_start = _time_ns() # subtract engine_reserve (thread-synchronization) time from callers to avoid double-counting
893918
ci_from_engine = engine_reserve(interp, mi)
919+
caller.time_paused += (_time_ns() - reserve_start)
894920
edge_ci = ci_from_engine
895921
codeinst = get(code_cache(interp), mi, nothing)
896922
if codeinst isa CodeInstance # return existing rettype if the code is already inferred

Compiler/src/utilities.jl

+2
Original file line numberDiff line numberDiff line change
@@ -351,3 +351,5 @@ function inbounds_option()
351351
end
352352

353353
is_asserts() = ccall(:jl_is_assertsbuild, Cint, ()) == 1
354+
355+
_time_ns() = ccall(:jl_hrtime, UInt64, ())

base/Base.jl

+3-5
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@ include("views.jl")
3636

3737
# numeric operations
3838
include("hashing.jl")
39-
include("rounding.jl")
4039
include("div.jl")
41-
include("float.jl")
4240
include("twiceprecision.jl")
4341
include("complex.jl")
4442
include("rational.jl")
@@ -213,9 +211,6 @@ using .PermutedDimsArrays
213211
include("sort.jl")
214212
using .Sort
215213

216-
# BinaryPlatforms, used by Artifacts. Needs `Sort`.
217-
include("binaryplatforms.jl")
218-
219214
# Fast math
220215
include("fastmath.jl")
221216
using .FastMath
@@ -269,6 +264,9 @@ include("linking.jl")
269264
include("staticdata.jl")
270265
include("loading.jl")
271266

267+
# BinaryPlatforms, used by Artifacts. Needs `Sort`.
268+
include("binaryplatforms.jl")
269+
272270
# misc useful functions & macros
273271
include("timing.jl")
274272
include("client.jl")

base/Base_compiler.jl

+2
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,8 @@ include("operators.jl")
277277
include("pointer.jl")
278278
include("refvalue.jl")
279279
include("cmem.jl")
280+
include("rounding.jl")
281+
include("float.jl")
280282

281283
include("checked.jl")
282284
using .Checked

base/array.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -1356,7 +1356,7 @@ end
13561356

13571357
append!(a::AbstractVector, iter) = _append!(a, IteratorSize(iter), iter)
13581358
push!(a::AbstractVector, iter...) = append!(a, iter)
1359-
append!(a::AbstractVector, iter...) = (for v in iter; append!(a, v); end; return a)
1359+
append!(a::AbstractVector, iter...) = (foreach(v -> append!(a, v), iter); a)
13601360

13611361
function _append!(a::AbstractVector, ::Union{HasLength,HasShape}, iter)
13621362
n = Int(length(iter))::Int

0 commit comments

Comments
 (0)