1
+ # ccall(:jl_exit_on_sigint, Cvoid, (Cint,), 0)
2
+
1
3
using Distributed
2
4
if haskey (ENV , " BENCHMARK_PROCS" )
3
5
const np, nt = parse .(Ref (Int), split (ENV [" BENCHMARK_PROCS" ], " :" ))
@@ -40,6 +42,10 @@ elseif render == "offline"
40
42
using FFMPEG, FileIO, ImageMagick
41
43
end
42
44
const RENDERS = Dict {Int,Dict} ()
45
+ const live_port = parse (Int, get (ENV , " BENCHMARK_LIVE_PORT" , " 8000" ))
46
+
47
+ const graph = parse (Bool, get (ENV , " BENCHMARK_GRAPH" , " 0" ))
48
+ const profile = parse (Bool, get (ENV , " BENCHMARK_PROFILE" , " 0" ))
43
49
44
50
_benches = get (ENV , " BENCHMARK" , " cpu,cpu+dagger" )
45
51
const benches = []
124
130
125
131
theory_flops (nrow, ncol, nfeatures) = 11 * ncol * nrow * nfeatures + 2 * (ncol + nrow) * nfeatures
126
132
127
- function nmf_suite (; dagger, accel, network, kwargs ... )
133
+ function nmf_suite (ctx ; dagger, accel, network)
128
134
suite = BenchmarkGroup ()
129
135
130
136
#= TODO : Re-enable
@@ -194,59 +200,67 @@ function nmf_suite(; dagger, accel, network, kwargs...)
194
200
else
195
201
error (" Unknown network $network " )
196
202
end
203
+ rr = true
197
204
opts = if accel == " cuda"
198
205
Dagger. Sch. SchedulerOptions (;proctypes= [
199
206
DaggerGPU. CuArrayDeviceProc
200
- ], network= net)
207
+ ], network= net,round_robin = rr )
201
208
elseif accel == " amdgpu"
202
209
Dagger. Sch. SchedulerOptions (;proctypes= [
203
210
DaggerGPU. ROCArrayProc
204
- ], network= net)
211
+ ], network= net,round_robin = rr )
205
212
elseif accel == " cpu"
206
- Dagger. Sch. SchedulerOptions (;network= net)
213
+ Dagger. Sch. SchedulerOptions (;network= net,round_robin = rr )
207
214
else
208
215
error (" Unknown accelerator $accel " )
209
216
end
210
- ctx = Context (collect ((1 : nw) .+ 1 ); kwargs... )
211
217
p = sum ([length (Dagger. get_processors (OSProc (id))) for id in 2 : (nw+ 1 )])
218
+ # bsz = ncol ÷ length(workers())
219
+ bsz = ncol ÷ 64
212
220
nsuite[" Workers: $nw " ] = @benchmarkable begin
213
- compute ($ ctx, nnmf ($ X[], $ W[], $ H[]); options= $ opts)
221
+ _ctx = Context ($ ctx, workers ()[1 : $ nw])
222
+ compute (_ctx, nnmf ($ X[], $ W[], $ H[]); options= $ opts)
214
223
end setup= begin
215
224
_nw, _scale = $ nw, $ scale
216
225
@info " Starting $_nw worker Dagger NNMF (scale by $_scale )"
217
- if render != " "
218
- Dagger. show_gantt ($ ctx; width= 1800 , window_length= 20 , delay= 2 , port= 4040 , live= live)
219
- end
220
226
if $ accel == " cuda"
221
227
# FIXME : Allocate with CUDA.rand if possible
222
- $ X[] = Dagger. mapchunks (CUDA. cu, compute (rand (Blocks ($ nrow, $ ncol ÷ $ p ), Float32, $ nrow, $ ncol); options= $ opts))
223
- $ W[] = Dagger. mapchunks (CUDA. cu, compute (rand (Blocks ($ nrow, $ ncol ÷ $ p ), Float32, $ nrow, $ nfeatures); options= $ opts))
224
- $ H[] = Dagger. mapchunks (CUDA. cu, compute (rand (Blocks ($ nrow, $ ncol ÷ $ p ), Float32, $ nfeatures, $ ncol); options= $ opts))
228
+ $ X[] = Dagger. mapchunks (CUDA. cu, compute (rand (Blocks ($ nrow, $ bsz ), Float32, $ nrow, $ ncol); options= $ opts))
229
+ $ W[] = Dagger. mapchunks (CUDA. cu, compute (rand (Blocks ($ nrow, $ bsz ), Float32, $ nrow, $ nfeatures); options= $ opts))
230
+ $ H[] = Dagger. mapchunks (CUDA. cu, compute (rand (Blocks ($ nrow, $ bsz ), Float32, $ nfeatures, $ ncol); options= $ opts))
225
231
elseif $ accel == " amdgpu"
226
232
$ X[] = Dagger. mapchunks (ROCArray, compute (rand (Blocks ($ nrow, $ ncol÷ $ p), Float32, $ nrow, $ ncol); options= $ opts))
227
233
$ W[] = Dagger. mapchunks (ROCArray, compute (rand (Blocks ($ nrow, $ ncol÷ $ p), Float32, $ nrow, $ nfeatures); options= $ opts))
228
234
$ H[] = Dagger. mapchunks (ROCArray, compute (rand (Blocks ($ nrow, $ ncol÷ $ p), Float32, $ nfeatures, $ ncol); options= $ opts))
229
235
elseif $ accel == " cpu"
230
- $ X[] = compute (rand (Blocks ($ nrow, $ ncol ÷ $ p ), Float32, $ nrow, $ ncol); options= $ opts)
231
- $ W[] = compute (rand (Blocks ($ nrow, $ ncol ÷ $ p ), Float32, $ nrow, $ nfeatures); options= $ opts)
232
- $ H[] = compute (rand (Blocks ($ nrow, $ ncol ÷ $ p ), Float32, $ nfeatures, $ ncol); options= $ opts)
236
+ $ X[] = compute (rand (Blocks ($ nrow, $ bsz ), Float32, $ nrow, $ ncol); options= $ opts)
237
+ $ W[] = compute (rand (Blocks ($ nrow, $ bsz ), Float32, $ nrow, $ nfeatures); options= $ opts)
238
+ $ H[] = compute (rand (Blocks ($ nrow, $ bsz ), Float32, $ nfeatures, $ ncol); options= $ opts)
233
239
end
234
240
end teardown= begin
235
- if render != " "
241
+ if render != " " && ! live
236
242
Dagger. continue_rendering[] = false
237
- video_paths = take! (Dagger. render_results)
238
- try
239
- video_data = Dict (key=> read (video_paths[key]) for key in keys (video_paths))
240
- push! (get! (()-> [], RENDERS[$ scale], $ nw), video_data)
241
- catch
243
+ for i in 1 : 5
244
+ isready (Dagger. render_results) && break
245
+ sleep (1 )
246
+ end
247
+ if isready (Dagger. render_results)
248
+ video_paths = take! (Dagger. render_results)
249
+ try
250
+ video_data = Dict (key=> read (video_paths[key]) for key in keys (video_paths))
251
+ push! (get! (()-> [], RENDERS[$ scale], $ nw), video_data)
252
+ catch err
253
+ @error " Failed to process render results" exception= (err,catch_backtrace ())
254
+ end
255
+ else
256
+ @warn " Failed to fetch render results"
242
257
end
243
258
end
244
259
$ X[] = nothing
245
260
$ W[] = nothing
246
261
$ H[] = nothing
247
262
@everywhere GC. gc ()
248
263
end
249
- break
250
264
nw ÷= 2
251
265
end
252
266
suite[" NNMF scaled by: $scale " ] = nsuite
@@ -261,28 +275,42 @@ function main()
261
275
output_prefix = " result-$(np) workers-$(nt) threads-$(Dates. now ()) "
262
276
263
277
suites = Dict ()
278
+ graph_opts = if graph && render != " "
279
+ (log_sink= Dagger. LocalEventLog (), log_file= output_prefix* " .dot" )
280
+ elseif render != " "
281
+ (log_sink= Dagger. LocalEventLog (),)
282
+ else
283
+ NamedTuple ()
284
+ end
285
+ ctx = Context (collect ((1 : nw) .+ 1 ); profile= profile, graph_opts... )
264
286
for bench in benches
265
287
name = bench. name
266
288
println (" creating $name benchmarks" )
267
- suites[name] = if bench. dagger
268
- nmf_suite (; dagger= true , accel= bench. accel, network= bench. network, log_sink= Dagger. LocalEventLog (), log_file= output_prefix* " .dot" , profile= false )
269
- else
270
- nmf_suite (; dagger= false , accel= bench. accel, network= bench. network)
289
+ suites[name] = nmf_suite (ctx; dagger= bench. dagger, accel= bench. accel, network= bench. network)
290
+ end
291
+ if render != " "
292
+ Dagger. show_gantt (ctx; width= 1800 , window_length= 5 , delay= 2 , port= live_port, live= live)
293
+ if live
294
+ # Make sure server code is compiled
295
+ sleep (1 )
296
+ run (pipeline (` curl -s localhost:$live_port /` ; stdout = devnull ))
297
+ run (pipeline (` curl -s localhost:$live_port /profile` ; stdout = devnull ))
298
+ @info " Rendering started on port $live_port "
271
299
end
272
300
end
273
301
res = Dict ()
274
302
for bench in benches
275
303
name = bench. name
276
304
println (" running $name benchmarks" )
277
305
res[name] = try
278
- run (suites[name]; samples= 5 , seconds= 10 * 60 , gcsample= true )
306
+ run (suites[name]; samples= 3 , seconds= 10 * 60 , gcsample= true )
279
307
catch err
280
308
@error " Error running $name benchmarks" exception= (err,catch_backtrace ())
281
309
nothing
282
310
end
283
311
end
284
312
for bench in benches
285
- println (" benchmark results for $(bench. name) : $(res[bench. name]) " )
313
+ println (" benchmark results for $(bench. name) : $(minimum ( res[bench. name]) ) " )
286
314
end
287
315
288
316
println (" saving results in $output_prefix .$output_format " )
@@ -294,6 +322,11 @@ function main()
294
322
serialize (io, outdict)
295
323
end
296
324
end
325
+
326
+ if parse (Bool, get (ENV , " BENCHMARK_VISUALIZE" , " 0" ))
327
+ run (` $(Base. julia_cmd ()) $(joinpath (pwd (), " visualize.jl" )) -- $(output_prefix* " ." * output_format) ` )
328
+ end
329
+
297
330
println (" Done." )
298
331
299
332
# TODO : Compare with multiple results
0 commit comments