@@ -267,33 +267,34 @@ function multitest_opt(problemDescriptions, method; NumRepetitions = 3)
267
267
268
268
start_time = time ()
269
269
ftn, dur = fitness_for_opt (prob, numdims, popsize, ceil (Int, numfevals), method)
270
- df[:ElapsedTime ] = dur
271
- df[:StartTime ] = Libc. strftime (" %Y-%m-%d %H:%M:%S" , start_time)
272
- df[:Fitness ] = ftn
270
+ df[! , :ElapsedTime ] = [ dur]
271
+ df[! , :StartTime ] = [ Libc. strftime (" %Y-%m-%d %H:%M:%S" , start_time)]
272
+ df[! , :Fitness ] = [ ftn]
273
273
274
274
push! (dfs, df)
275
275
end
276
276
end
277
277
278
- vcat (dfs)
278
+ vcat (dfs... )
279
279
end
280
280
281
281
using CSV
282
282
283
283
function read_benchmark_db (filename)
284
284
if isfile (filename)
285
- return CSV. read (filename)
285
+ return CSV. read (filename, DataFrame )
286
286
else
287
287
@warn " Benchmark results file $filename not found, returning empty frame"
288
288
return DataFrame ()
289
289
end
290
290
end
291
291
292
292
function add_rank_per_group (df, groupcols, rankcol, resultcol)
293
- by (df, groupcols) do subdf
294
- orderedsubdf = subdf[sortperm (subdf[:,rankcol]), :]
295
- orderedsubdf[resultcol] = collect (1 : size (orderedsubdf,1 ))
296
- return orderedsubdf
293
+ gdf = groupby (df, groupcols)
294
+ combine (gdf) do subdf
295
+ ordered = subdf[sortperm (subdf[:, rankcol]), :]
296
+ ordered[! , resultcol] = collect (1 : size (ordered,1 ))
297
+ return ordered
297
298
end
298
299
end
299
300
@@ -303,45 +304,45 @@ function list_benchmark_db(db, saveResultCsvFile = nothing)
303
304
304
305
if numrows > 1
305
306
# Find min fitness per problem
306
- minfitpp = by (db, [:Problem , :NumDims ]) do df
307
- DataFrame (
308
- MinFitness = minimum (df[:Fitness ])
309
- )
307
+ minfitpp = combine (groupby (db, [:Problem , :NumDims ])) do df
308
+ DataFrame (MinFitness = minimum (df. Fitness))
310
309
end
311
310
312
311
# Add col with order of magnitude worse than min fitness for each run
313
- db = join (db, minfitpp, on = [:Problem , :NumDims ])
314
- db[:LogTimesWorseFitness ] = log10 .(db[ : Fitness] ./ db[ : MinFitness] )
312
+ db = leftjoin (db, minfitpp, on = [:Problem , :NumDims ])
313
+ db[:, : LogTimesWorseFitness ] = log10 .(db. Fitness ./ db. MinFitness)
315
314
316
315
# Calc median fitness and time per problem and method.
317
- sumdf = by ( db, [:Problem , :NumDims , :Method ]) do df
316
+ sumdf = combine ( groupby ( db, [:Problem , :NumDims , :Method ]) ) do df
318
317
DataFrame (N = size (df, 1 ),
319
- MedianFitness = median (df[:, : Fitness] ),
320
- MedianTime = median (df[:, : ElapsedTime] ))
318
+ MedianFitness = median (df. Fitness),
319
+ MedianTime = median (df. ElapsedTime))
321
320
end
322
321
323
322
# Rank on median fitness and median time for each problem.
324
323
sumdf = add_rank_per_group (sumdf, [:Problem , :NumDims ], :MedianFitness , :RankFitness )
325
324
sumdf = add_rank_per_group (sumdf, [:Problem , :NumDims ], :MedianTime , :RankTime )
326
325
327
326
# Get number of runs and median magnitude worse per method
328
- permethod = by ( db, [:Method ]) do df
327
+ permethod = combine ( groupby ( db, [:Method ]) ) do df
329
328
DataFrame (
330
329
NumRuns = size (df, 1 ),
331
- MedianLogTimesWorseFitness = round (median (df[:, : LogTimesWorseFitness] ), digits= 1 )
330
+ MedianLogTimesWorseFitness = round (median (df. LogTimesWorseFitness), digits= 1 )
332
331
)
333
332
end
334
333
335
334
# and merge with table with mean ranks of fitness and time.
336
- summarydf = by (sumdf, [:Method ]) do df
335
+ summarydf = combine (groupby (sumdf, [:Method ])) do df
336
+ rfs = df. RankFitness
337
+ rts = df. RankTime
337
338
DataFrame (
338
- MeanRank = round (mean (df[ :RankFitness ] ), digits= 3 ),
339
- Num1sFitness = sum (r -> (r == 1 ) ? 1 : 0 , df[ :RankFitness ] ),
340
- MeanRankTime = round (mean (df[ :RankTime ] ), digits= 3 ),
341
- Num1sTime = sum (r -> (r == 1 ) ? 1 : 0 , df[ :RankTime ] ),
339
+ MeanRank = round (mean (rfs ), digits= 3 ),
340
+ Num1sFitness = sum (r -> (r == 1 ) ? 1 : 0 , rfs ),
341
+ MeanRankTime = round (mean (rts ), digits= 3 ),
342
+ Num1sTime = sum (r -> (r == 1 ) ? 1 : 0 , rts ),
342
343
)
343
344
end
344
- df = join (summarydf, permethod, on = :Method )
345
+ df = leftjoin (summarydf, permethod, on = :Method )
345
346
346
347
# Now sort and print
347
348
sort! (df, [:MeanRank , :MeanRankTime , :Num1sFitness ])
@@ -381,14 +382,14 @@ function compare_optimizers_to_benchmarks(benchmarkfile, pset, optimizers, nreps
381
382
totalruns = length (optimizers) * nreps * length (pset)
382
383
runnum = 0
383
384
for optmethod in optimizers
384
- optsel = db[ : Method] .== string (optmethod)
385
+ optsel = db. Method .== string (optmethod)
385
386
for pd in pset
386
387
probname, numdims, popsize, numfevals = pd
387
- psel = db[ : Problem] .== probname
388
- dsel = db[ : NumDims] .== numdims
388
+ psel = db. Problem .== probname
389
+ dsel = db. NumDims .== numdims
389
390
df = db[optsel .& psel .& dsel, :]
390
- benchfitnesses = convert (Vector{Float64}, df[ : Fitness] )
391
- benchtimes = convert (Vector{Float64}, df[ : ElapsedTime] )
391
+ benchfitnesses = convert (Vector{Float64}, df. Fitness)
392
+ benchtimes = convert (Vector{Float64}, df. ElapsedTime)
392
393
newfs = Float64[]
393
394
newtimes = Float64[]
394
395
prob = BlackBoxOptim. example_problems[probname]
@@ -437,19 +438,19 @@ function compare_optimizers_to_benchmarks(benchmarkfile, pset, optimizers, nreps
437
438
438
439
# Use Benjamini-Hochberg to judge which pvalues are significant given we did
439
440
# many comparisons.
440
- ftn_pvs = convert (Vector{Float64}, df[ : FitnessPvalue] )
441
- df[:FitnessSignificantBH001 ] = benjamini_hochberg (ftn_pvs, 0.01 )
442
- df[:FitnessSignificantBH005 ] = benjamini_hochberg (ftn_pvs, 0.05 )
443
- df[:FitnessSignificantBH010 ] = benjamini_hochberg (ftn_pvs, 0.10 )
441
+ ftn_pvs = convert (Vector{Float64}, df. FitnessPvalue)
442
+ df[! , :FitnessSignificantBH001 ] = benjamini_hochberg (ftn_pvs, 0.01 )
443
+ df[! , :FitnessSignificantBH005 ] = benjamini_hochberg (ftn_pvs, 0.05 )
444
+ df[! , :FitnessSignificantBH010 ] = benjamini_hochberg (ftn_pvs, 0.10 )
444
445
445
- time_pvs = convert (Vector{Float64}, df[ : TimePvalue] )
446
- df[:TimeSignificantBH001 ] = benjamini_hochberg (time_pvs, 0.01 )
447
- df[:TimeSignificantBH005 ] = benjamini_hochberg (time_pvs, 0.05 )
448
- df[:TimeSignificantBH010 ] = benjamini_hochberg (time_pvs, 0.10 )
446
+ time_pvs = convert (Vector{Float64}, df. TimePvalue)
447
+ df[! , :TimeSignificantBH001 ] = benjamini_hochberg (time_pvs, 0.01 )
448
+ df[! , :TimeSignificantBH005 ] = benjamini_hochberg (time_pvs, 0.05 )
449
+ df[! , :TimeSignificantBH010 ] = benjamini_hochberg (time_pvs, 0.10 )
449
450
450
451
CSV. write (Libc. strftime (" comparison_%Y%m%d_%H%M%S.csv" , time ()), df)
451
452
else
452
- df = CSV. read (comparisonfile)
453
+ df = CSV. read (comparisonfile, DataFrame )
453
454
end
454
455
sort! (df, [:FitnessPvalue ])
455
456
report_below_pvalue (df, col_prefix= " Fitness" , pvalue= 1.00 )
@@ -461,14 +462,14 @@ function compare_optimizers_to_benchmarks(benchmarkfile, pset, optimizers, nreps
461
462
462
463
# Report (in color) on number of significant differences after Benjamini-Hochberg
463
464
# correction.
464
- n_ftn_reg = sum (df[ : FitnessSignificantBH005] .& (df[ : FitnessOrder] .== " <" ))
465
- n_ftn_imp = sum (df[ : FitnessSignificantBH005] .& (df[ : FitnessOrder] .== " >" ))
465
+ n_ftn_reg = sum (df. FitnessSignificantBH005 .& (df. FitnessOrder .== " <" ))
466
+ n_ftn_imp = sum (df. FitnessSignificantBH005 .& (df. FitnessOrder .== " >" ))
466
467
printstyled (" \n $n_ftn_reg significant fitness regressions at Benjamini-Hochberg 0.05 level\n " ,
467
468
color= n_ftn_reg > 0 ? :red : :green )
468
469
printstyled (" \n $n_ftn_imp significant fitness improvments at Benjamini-Hochberg 0.05 level\n " ,
469
470
color= n_ftn_imp > 0 ? :green : :white )
470
- n_time_reg = sum (df[ : TimeSignificantBH005] .& (df[ : TimeOrder] .== " <" ))
471
- n_time_imp = sum (df[ : TimeSignificantBH005] .& (df[ : TimeOrder] .== " >" ))
471
+ n_time_reg = sum (df. TimeSignificantBH005 .& (df. TimeOrder .== " <" ))
472
+ n_time_imp = sum (df. TimeSignificantBH005 .& (df. TimeOrder .== " >" ))
472
473
printstyled (" \n $n_time_reg significant time regressions at Benjamini-Hochberg 0.05 level\n " ,
473
474
color= n_time_reg > 0 ? :red : :green )
474
475
printstyled (" \n $n_time_imp significant time improvments at Benjamini-Hochberg 0.05 level\n " ,
@@ -491,7 +492,7 @@ function benjamini_hochberg(pvals, alpha = 0.05)
491
492
end
492
493
493
494
function report_below_pvalue (df; col_prefix= " Fitness" , pvalue = 0.05 )
494
- selection = df[Symbol (col_prefix, " Pvalue" )] .< pvalue
495
+ selection = df[:, Symbol (col_prefix, " Pvalue" )] .< pvalue
495
496
log (" Num problems with $col_prefix p-values < $(pvalue) : $(sum (selection)) \n " )
496
497
# workaround sum(isequal, []) throws
497
498
num_new_worse = sum (df[selection, Symbol (col_prefix, " Order" )] .== " <" )
@@ -502,4 +503,4 @@ function report_below_pvalue(df; col_prefix="Fitness", pvalue = 0.05)
502
503
any (selection) && println (df[selection, :])
503
504
end
504
505
505
- @CPUtime main (ARGS )
506
+ @CPUtime main (ARGS )
0 commit comments