Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit325f414

Browse files
committed
optimizer: alias-aware SROA
Enhances SROA of mutables using the novel Julia-level escape analysis (on top of#43800):1. alias-aware SROA, mutable ϕ-node elimination2. `isdefined` check elimination3. load-forwarding for non-eliminable but analyzable mutables---1. alias-aware SROA, mutable ϕ-node eliminationEA's alias analysis allows this new SROA to handle nested mutables allocationspretty well. Now we can eliminate the heap allocations completely fromthis insanely nested examples by the single analysis/optimization pass:```juliajulia> function refs(x) (Ref(Ref(Ref(Ref(Ref(Ref(Ref(Ref(Ref(Ref((x))))))))))))[][][][][][][][][][] endrefs (generic function with 1 method)julia> refs("julia");@allocated refs("julia")0```EA can also analyze escape of ϕ-node as well as its aliasing.Mutable ϕ-nodes would be eliminated even for a very tricky case as like:```juliajulia> code_typed((Bool,String,)) do cond, x # these allocation form multiple ϕ-nodes if cond ϕ2 = ϕ1 = Ref{Any}("foo") else ϕ2 = ϕ1 = Ref{Any}("bar") end ϕ2[] = x y = ϕ1[] # => x return y end1-element Vector{Any}: CodeInfo(1 ─ goto#3 if not cond2 ─ goto#43 ─ nothing::Nothing4 ┄ return x) => Any```Combined with the alias analysis and ϕ-node handling above,allocations in the following "realistic" examples will be optimized:```juliajulia> # demonstrate the power of our field / alias analysis with realistic end to end examples # adapted fromhttp://wiki.luajit.org/Allocation-Sinking-Optimization#implementation%5B abstract type AbstractPoint{T} endjulia> struct Point{T} <: AbstractPoint{T} x::T y::T endjulia> mutable struct MPoint{T} <: AbstractPoint{T} x::T y::T endjulia> add(a::P, b::P) where P<:AbstractPoint = P(a.x + b.x, a.y + b.y);julia> function compute_point(T, n, ax, ay, bx, by) a = T(ax, ay) b = T(bx, by) for i in 0:(n-1) a = add(add(a, b), b) end a.x, a.y end;julia> function compute_point(n, a, b) for i in 0:(n-1) a = add(add(a, b), b) end a.x, a.y end;julia> function compute_point!(n, a, b) for i in 0:(n-1) a′ = add(add(a, b), b) a.x = a′.x a.y = a′.y end end;julia> compute_point(MPoint, 10, 1+.5, 2+.5, 2+.25, 4+.75);julia> compute_point(MPoint, 10, 1+.5im, 2+.5im, 2+.25im, 4+.75im);julia>@allocated compute_point(MPoint, 10000, 1+.5, 2+.5, 2+.25, 4+.75)0julia>@allocated compute_point(MPoint, 10000, 1+.5im, 2+.5im, 2+.25im, 4+.75im)0julia> compute_point(10, MPoint(1+.5, 2+.5), MPoint(2+.25, 4+.75));julia> compute_point(10, MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im));julia>@allocated compute_point(10000, MPoint(1+.5, 2+.5), MPoint(2+.25, 4+.75))0julia>@allocated compute_point(10000, MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))0julia> af, bf = MPoint(1+.5, 2+.5), MPoint(2+.25, 4+.75);julia> ac, bc = MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im);julia> compute_point!(10, af, bf);julia> compute_point!(10, ac, bc);julia>@allocated compute_point!(10000, af, bf)0julia>@allocated compute_point!(10000, ac, bc)0```2. `isdefined` check eliminationThis commit also implements a simple optimization to eliminate`isdefined` call by checking load-fowardability.This optimization may be especially useful to eliminate extra allocationinvolved with a capturing closure, e.g.:```juliajulia> callit(f, args...) = f(args...);julia> function isdefined_elim() local arr::Vector{Any} callit() do arr = Any[] end return arr end;julia> code_typed(isdefined_elim)1-element Vector{Any}: CodeInfo(1 ─ %1 = $(Expr(:foreigncall, :(:jl_alloc_array_1d), Vector{Any}, svec(Any, Int64), 0, :(:ccall), Vector{Any}, 0, 0))::Vector{Any}└── goto#3 if not true2 ─ goto#43 ─ $(Expr(:throw_undef_if_not, :arr, false))::Any4 ┄ return %1) => Vector{Any}```3. load-forwarding for non-eliminable but analyzable mutablesEA also allows us to forward loads even when the mutable allocationcan't be eliminated but still its fields are known precisely.The load forwarding might be useful since it may derive new type informationthat succeeding optimization passes can use (or just because it allowssimpler code transformations down the load):```juliajulia> code_typed((Bool,String,)) do c, s r = Ref{Any}(s) if c return r[]::String # adce_pass! will further eliminate this type assert call also else return r end end1-element Vector{Any}: CodeInfo(1 ─ %1 = %new(Base.RefValue{Any}, s)::Base.RefValue{Any}└── goto#3 if not c2 ─ return s3 ─ return %1) => Union{Base.RefValue{Any}, String}```---Please refer to the newly added test cases for more examples.Also, EA's alias analysis already succeeds to reason about arrays, andso this EA-based SROA will hopefully be generalized for array SROA as well.
1 parent25635ea commit325f414

File tree

7 files changed

+1252
-422
lines changed

7 files changed

+1252
-422
lines changed

‎base/compiler/bootstrap.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ let
1111
world=get_world_counter()
1212
interp=NativeInterpreter(world)
1313

14-
analyze_escapes_tt= Tuple{typeof(analyze_escapes), IRCode, Int, Bool,typeof(get_escape_cache(code_cache(interp)))}
14+
analyze_escapes_tt= Any[typeof(analyze_escapes), IRCode, Int, Bool,
15+
# typeof(get_escape_cache(code_cache(interp))) # once we enable IPO EA
16+
typeof(null_escape_cache)
17+
]
18+
analyze_escapes_tt= Tuple{analyze_escapes_tt...}
1519
fs= Any[
1620
# we first create caches for the optimizer, because they contain many loop constructions
1721
# and they're better to not run in interpreter even during bootstrapping

‎base/compiler/optimize.jl

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ and then caches it into a global cache for later interprocedural propagation.
9898
cache_escapes!(caller::InferenceResult, estate::EscapeState)=
9999
caller.argescapes=ArgEscapeCache(estate)
100100

101-
functionget_escape_cache(mi_cache::MICache)where MICache
101+
functionipo_escape_cache(mi_cache::MICache)where MICache
102102
returnfunction (linfo::Union{InferenceResult,MethodInstance})
103103
ifisa(linfo, InferenceResult)
104104
argescapes= linfo.argescapes
@@ -110,6 +110,7 @@ function get_escape_cache(mi_cache::MICache) where MICache
110110
return argescapes!==nothing? argescapes::ArgEscapeCache:nothing
111111
end
112112
end
113+
null_escape_cache(linfo::Union{InferenceResult,MethodInstance})=nothing
113114

114115
mutable struct OptimizationState
115116
linfo::MethodInstance
@@ -540,17 +541,24 @@ function run_passes(ci::CodeInfo, sv::OptimizationState, caller::InferenceResult
540541
#TODO: Domsorting can produce an updated domtree - no need to recompute here
541542
@timeit"compact 1" ir=compact!(ir)
542543
nargs=let def= sv.linfo.def;isa(def, Method)?Int(def.nargs):0;end
543-
get_escape_cache= (@__MODULE__).get_escape_cache(sv.inlining.mi_cache)
544-
ifis_ipo_profitable(ir, nargs)
545-
@timeit"IPO EA"begin
546-
state=analyze_escapes(ir,nargs,false,get_escape_cache)
547-
cache_escapes!(caller, state)
548-
end
549-
end
544+
# if is_ipo_profitable(ir, nargs)
545+
# @timeit "IPO EA" begin
546+
# state = analyze_escapes(ir,
547+
#nargs,#=call_resolved=#false,ipo_escape_cache(sv.inlining.mi_cache))
548+
# cache_escapes!(caller, state)
549+
# end
550+
#end
550551
@timeit"Inlining" ir=ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
551552
# @timeit "verify 2" verify_ir(ir)
552553
@timeit"compact 2" ir=compact!(ir)
553-
@timeit"SROA" ir=sroa_pass!(ir)
554+
@timeit"SROA" ir, memory_opt=linear_pass!(ir)
555+
if memory_opt
556+
@timeit"memory_opt_pass!"begin
557+
@timeit"Local EA" estate=analyze_escapes(ir,
558+
nargs,#=call_resolved=#true, null_escape_cache)
559+
@timeit"memory_opt_pass!" ir=memory_opt_pass!(ir, estate)
560+
end
561+
end
554562
@timeit"ADCE" ir=adce_pass!(ir)
555563
@timeit"type lift" ir=type_lift_pass!(ir)
556564
@timeit"compact 3" ir=compact!(ir)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp