@@ -44,10 +44,10 @@ The internal state-holding struct of the scheduler.
44
44
45
45
Fields:
46
46
- uid::UInt64 - Unique identifier for this scheduler instance
47
- - dependents::OneToMany - The result of calling `dependents` on the DAG
47
+ - dependents::Dict{Union{Thunk,Chunk},Set{Thunk}} - The result of calling `dependents` on the DAG
48
48
- finished::Set{Thunk} - The set of completed `Thunk`s
49
49
- waiting::OneToMany - Map from downstream `Thunk` to upstream `Thunk`s that still need to execute
50
- - waiting_data::OneToMany - Map from upstream `Thunk` to all downstream `Thunk`s, accumulating over time
50
+ - waiting_data::Dict{Union{Thunk,Chunk},Set{Thunk}} - Map from input `Chunk`/ upstream `Thunk` to all unfinished downstream `Thunk`s, to retain caches
51
51
- ready::Vector{Thunk} - The list of `Thunk`s that are ready to execute
52
52
- cache::Dict{Thunk, Any} - Maps from a finished `Thunk` to it's cached result, often a DRef
53
53
- running::Set{Thunk} - The set of currently-running `Thunk`s
@@ -67,10 +67,10 @@ Fields:
67
67
"""
68
68
struct ComputeState
69
69
uid:: UInt64
70
- dependents:: OneToMany
70
+ dependents:: Dict{Union{Thunk,Chunk},Set{Thunk}}
71
71
finished:: Set{Thunk}
72
72
waiting:: OneToMany
73
- waiting_data:: OneToMany
73
+ waiting_data:: Dict{Union{Thunk,Chunk},Set{Thunk}}
74
74
ready:: Vector{Thunk}
75
75
cache:: Dict{Thunk, Any}
76
76
running:: Set{Thunk}
@@ -94,7 +94,7 @@ function start_state(deps::Dict, node_order, chan)
94
94
deps,
95
95
Set {Thunk} (),
96
96
OneToMany (),
97
- OneToMany (),
97
+ Dict {Union{Thunk,Chunk},Set{Thunk}} (),
98
98
Vector {Thunk} (undef, 0 ),
99
99
Dict {Thunk, Any} (),
100
100
Set {Thunk} (),
@@ -300,7 +300,7 @@ function compute_dag(ctx, d::Thunk; options=SchedulerOptions())
300
300
state = start_state (deps, node_order, chan)
301
301
302
302
# setup thunk_dict mappings
303
- for node in keys (deps)
303
+ for node in filter (istask, keys (deps) )
304
304
state. thunk_dict[node. id] = node
305
305
for dep in deps[node]
306
306
state. thunk_dict[dep. id] = dep
@@ -395,7 +395,7 @@ function compute_dag(ctx, d::Thunk; options=SchedulerOptions())
395
395
# FIXME : Move log start and lock to before error check
396
396
@dbg timespan_start (ctx, :finish , thunk_id, master)
397
397
lock (state. lock) do
398
- finish_task! (state, node, thunk_failed)
398
+ finish_task! (ctx, state, node, thunk_failed)
399
399
end
400
400
@dbg timespan_end (ctx, :finish , thunk_id, master)
401
401
@@ -651,7 +651,7 @@ function pop_with_affinity!(ctx, tasks, proc)
651
651
return nothing
652
652
end
653
653
654
- function finish_task! (state, node, thunk_failed; free= true )
654
+ function finish_task! (ctx, state, node, thunk_failed; free= true )
655
655
pop! (state. running, node)
656
656
if ! thunk_failed
657
657
push! (state. finished, node)
@@ -683,22 +683,41 @@ function finish_task!(state, node, thunk_failed; free=true)
683
683
delete! (state. futures, node)
684
684
end
685
685
end
686
- # Internal clean-up
687
- for inp in filter (istask, inputs (node))
686
+
687
+ # Chunk clean-up
688
+ to_evict = Set {Chunk} ()
689
+ for inp in filter (t-> istask (t) || (t isa Chunk), inputs (node))
688
690
if inp in keys (state. waiting_data)
689
691
s = state. waiting_data[inp]
690
692
if node in s
691
693
pop! (s, node)
692
694
end
693
695
if free && isempty (s)
694
- if haskey (state. cache, inp)
696
+ if istask (inp) && haskey (state. cache, inp)
695
697
_node = state. cache[inp]
698
+ if _node isa Chunk
699
+ push! (to_evict, _node)
700
+ end
696
701
free! (_node, force= false , cache= (istask (inp) && inp. cache))
697
702
pop! (state. cache, inp)
703
+ elseif inp isa Chunk
704
+ push! (to_evict, inp)
698
705
end
699
706
end
700
707
end
701
708
end
709
+ if ! isempty (to_evict)
710
+ @sync for w in map (p-> p. pid, procs_to_use (ctx))
711
+ @async remote_do (evict_chunks!, w, to_evict)
712
+ end
713
+ end
714
+ end
715
+
716
+ function evict_chunks! (chunks:: Set{Chunk} )
717
+ for chunk in chunks
718
+ haskey (CHUNK_CACHE, chunk) && delete! (CHUNK_CACHE, chunk)
719
+ end
720
+ nothing
702
721
end
703
722
704
723
fire_task! (ctx, thunk:: Thunk , p, state; util= 10 ^ 9 ) =
@@ -717,7 +736,7 @@ function fire_tasks!(ctx, thunks::Vector{<:Tuple}, (gproc, proc), state)
717
736
# cache hit
718
737
state. cache[thunk] = data
719
738
thunk_failed = thunk in state. errored
720
- finish_task! (state, thunk, thunk_failed; free= false )
739
+ finish_task! (ctx, state, thunk, thunk_failed; free= false )
721
740
continue
722
741
else
723
742
# cache miss
@@ -728,7 +747,7 @@ function fire_tasks!(ctx, thunks::Vector{<:Tuple}, (gproc, proc), state)
728
747
try
729
748
result = thunk. options. restore (thunk)
730
749
state. cache[thunk] = result
731
- finish_task! (state, thunk, false ; free= false )
750
+ finish_task! (ctx, state, thunk, false ; free= false )
732
751
continue
733
752
catch err
734
753
@error " Thunk restore failed" exception= (err,catch_backtrace ())
0 commit comments