Skip to content

Commit 325f414

Browse files
committed
optimizer: alias-aware SROA
Enhances SROA of mutables using the novel Julia-level escape analysis (on top of #43800): 1. alias-aware SROA, mutable ϕ-node elimination 2. `isdefined` check elimination 3. load-forwarding for non-eliminable but analyzable mutables --- 1. alias-aware SROA, mutable ϕ-node elimination EA's alias analysis allows this new SROA to handle nested mutables allocations pretty well. Now we can eliminate the heap allocations completely from this insanely nested examples by the single analysis/optimization pass: ```julia julia> function refs(x) (Ref(Ref(Ref(Ref(Ref(Ref(Ref(Ref(Ref(Ref((x))))))))))))[][][][][][][][][][] end refs (generic function with 1 method) julia> refs("julia"); @allocated refs("julia") 0 ``` EA can also analyze escape of ϕ-node as well as its aliasing. Mutable ϕ-nodes would be eliminated even for a very tricky case as like: ```julia julia> code_typed((Bool,String,)) do cond, x # these allocation form multiple ϕ-nodes if cond ϕ2 = ϕ1 = Ref{Any}("foo") else ϕ2 = ϕ1 = Ref{Any}("bar") end ϕ2[] = x y = ϕ1[] # => x return y end 1-element Vector{Any}: CodeInfo( 1 ─ goto #3 if not cond 2 ─ goto #4 3 ─ nothing::Nothing 4 ┄ return x ) => Any ``` Combined with the alias analysis and ϕ-node handling above, allocations in the following "realistic" examples will be optimized: ```julia julia> # demonstrate the power of our field / alias analysis with realistic end to end examples # adapted from http://wiki.luajit.org/Allocation-Sinking-Optimization#implementation%5B abstract type AbstractPoint{T} end julia> struct Point{T} <: AbstractPoint{T} x::T y::T end julia> mutable struct MPoint{T} <: AbstractPoint{T} x::T y::T end julia> add(a::P, b::P) where P<:AbstractPoint = P(a.x + b.x, a.y + b.y); julia> function compute_point(T, n, ax, ay, bx, by) a = T(ax, ay) b = T(bx, by) for i in 0:(n-1) a = add(add(a, b), b) end a.x, a.y end; julia> function compute_point(n, a, b) for i in 0:(n-1) a = add(add(a, b), b) end a.x, a.y end; julia> function compute_point!(n, a, b) for i in 0:(n-1) a′ = add(add(a, b), b) a.x = a′.x a.y = a′.y end end; julia> compute_point(MPoint, 10, 1+.5, 2+.5, 2+.25, 4+.75); julia> compute_point(MPoint, 10, 1+.5im, 2+.5im, 2+.25im, 4+.75im); julia> @allocated compute_point(MPoint, 10000, 1+.5, 2+.5, 2+.25, 4+.75) 0 julia> @allocated compute_point(MPoint, 10000, 1+.5im, 2+.5im, 2+.25im, 4+.75im) 0 julia> compute_point(10, MPoint(1+.5, 2+.5), MPoint(2+.25, 4+.75)); julia> compute_point(10, MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im)); julia> @allocated compute_point(10000, MPoint(1+.5, 2+.5), MPoint(2+.25, 4+.75)) 0 julia> @allocated compute_point(10000, MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im)) 0 julia> af, bf = MPoint(1+.5, 2+.5), MPoint(2+.25, 4+.75); julia> ac, bc = MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im); julia> compute_point!(10, af, bf); julia> compute_point!(10, ac, bc); julia> @allocated compute_point!(10000, af, bf) 0 julia> @allocated compute_point!(10000, ac, bc) 0 ``` 2. `isdefined` check elimination This commit also implements a simple optimization to eliminate `isdefined` call by checking load-fowardability. This optimization may be especially useful to eliminate extra allocation involved with a capturing closure, e.g.: ```julia julia> callit(f, args...) = f(args...); julia> function isdefined_elim() local arr::Vector{Any} callit() do arr = Any[] end return arr end; julia> code_typed(isdefined_elim) 1-element Vector{Any}: CodeInfo( 1 ─ %1 = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Vector{Any}, svec(Any, Int64), 0, :(:ccall), Vector{Any}, 0, 0))::Vector{Any} └── goto #3 if not true 2 ─ goto #4 3 ─ $(Expr(:throw_undef_if_not, :arr, false))::Any 4 ┄ return %1 ) => Vector{Any} ``` 3. load-forwarding for non-eliminable but analyzable mutables EA also allows us to forward loads even when the mutable allocation can't be eliminated but still its fields are known precisely. The load forwarding might be useful since it may derive new type information that succeeding optimization passes can use (or just because it allows simpler code transformations down the load): ```julia julia> code_typed((Bool,String,)) do c, s r = Ref{Any}(s) if c return r[]::String # adce_pass! will further eliminate this type assert call also else return r end end 1-element Vector{Any}: CodeInfo( 1 ─ %1 = %new(Base.RefValue{Any}, s)::Base.RefValue{Any} └── goto #3 if not c 2 ─ return s 3 ─ return %1 ) => Union{Base.RefValue{Any}, String} ``` --- Please refer to the newly added test cases for more examples. Also, EA's alias analysis already succeeds to reason about arrays, and so this EA-based SROA will hopefully be generalized for array SROA as well.
1 parent 25635ea commit 325f414

File tree

7 files changed

+1252
-422
lines changed

7 files changed

+1252
-422
lines changed

base/compiler/bootstrap.jl

+5-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ let
1111
world = get_world_counter()
1212
interp = NativeInterpreter(world)
1313

14-
analyze_escapes_tt = Tuple{typeof(analyze_escapes), IRCode, Int, Bool, typeof(get_escape_cache(code_cache(interp)))}
14+
analyze_escapes_tt = Any[typeof(analyze_escapes), IRCode, Int, Bool,
15+
# typeof(get_escape_cache(code_cache(interp))) # once we enable IPO EA
16+
typeof(null_escape_cache)
17+
]
18+
analyze_escapes_tt = Tuple{analyze_escapes_tt...}
1519
fs = Any[
1620
# we first create caches for the optimizer, because they contain many loop constructions
1721
# and they're better to not run in interpreter even during bootstrapping

base/compiler/optimize.jl

+17-9
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ and then caches it into a global cache for later interprocedural propagation.
9898
cache_escapes!(caller::InferenceResult, estate::EscapeState) =
9999
caller.argescapes = ArgEscapeCache(estate)
100100

101-
function get_escape_cache(mi_cache::MICache) where MICache
101+
function ipo_escape_cache(mi_cache::MICache) where MICache
102102
return function (linfo::Union{InferenceResult,MethodInstance})
103103
if isa(linfo, InferenceResult)
104104
argescapes = linfo.argescapes
@@ -110,6 +110,7 @@ function get_escape_cache(mi_cache::MICache) where MICache
110110
return argescapes !== nothing ? argescapes::ArgEscapeCache : nothing
111111
end
112112
end
113+
null_escape_cache(linfo::Union{InferenceResult,MethodInstance}) = nothing
113114

114115
mutable struct OptimizationState
115116
linfo::MethodInstance
@@ -540,17 +541,24 @@ function run_passes(ci::CodeInfo, sv::OptimizationState, caller::InferenceResult
540541
# TODO: Domsorting can produce an updated domtree - no need to recompute here
541542
@timeit "compact 1" ir = compact!(ir)
542543
nargs = let def = sv.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end
543-
get_escape_cache = (@__MODULE__).get_escape_cache(sv.inlining.mi_cache)
544-
if is_ipo_profitable(ir, nargs)
545-
@timeit "IPO EA" begin
546-
state = analyze_escapes(ir, nargs, false, get_escape_cache)
547-
cache_escapes!(caller, state)
548-
end
549-
end
544+
# if is_ipo_profitable(ir, nargs)
545+
# @timeit "IPO EA" begin
546+
# state = analyze_escapes(ir,
547+
# nargs, #=call_resolved=#false, ipo_escape_cache(sv.inlining.mi_cache))
548+
# cache_escapes!(caller, state)
549+
# end
550+
# end
550551
@timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
551552
# @timeit "verify 2" verify_ir(ir)
552553
@timeit "compact 2" ir = compact!(ir)
553-
@timeit "SROA" ir = sroa_pass!(ir)
554+
@timeit "SROA" ir, memory_opt = linear_pass!(ir)
555+
if memory_opt
556+
@timeit "memory_opt_pass!" begin
557+
@timeit "Local EA" estate = analyze_escapes(ir,
558+
nargs, #=call_resolved=#true, null_escape_cache)
559+
@timeit "memory_opt_pass!" ir = memory_opt_pass!(ir, estate)
560+
end
561+
end
554562
@timeit "ADCE" ir = adce_pass!(ir)
555563
@timeit "type lift" ir = type_lift_pass!(ir)
556564
@timeit "compact 3" ir = compact!(ir)

0 commit comments

Comments
 (0)