Skip to content

Commit c212249

Browse files
committed
CI: Improve plots generated by performance check script
- Group benchmarks by category - Left-align legend, one entry per line - Don't create images for categories without changes ("No Data") - Support newly added and removed benchmarks
1 parent 6a2b416 commit c212249

File tree

1 file changed

+133
-53
lines changed

1 file changed

+133
-53
lines changed

ci/check-perf-impact.rb

Lines changed: 133 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,64 @@
1212

1313
require 'csv'
1414
require 'gruff'
15-
require 'json'
1615
require 'digest'
1716

17+
# Monkey patch Gruff to allow left-aligning labels in the legend; only allow one entry per line.
18+
module Gruff
19+
class Base
20+
def draw_legend
21+
return if @hide_legend
22+
23+
legend_labels = store.data.map(&:label)
24+
legend_square_width = @legend_box_size # small square with color of this item
25+
line_height = [legend_caps_height, legend_square_width].max + @legend_margin
26+
27+
current_y_offset = begin
28+
if @legend_at_bottom
29+
@graph_bottom + @legend_margin + labels_caps_height + @label_margin + (@x_axis_label ? (@label_margin * 2) + marker_caps_height : 0)
30+
else
31+
hide_title? ? @top_margin + @title_margin : @top_margin + @title_margin + title_caps_height
32+
end
33+
end
34+
35+
current_x_offset = @left_margin
36+
37+
legend_labels.each_with_index do |legend_label, index|
38+
unless legend_label.empty?
39+
legend_label_width = calculate_width(@legend_font, legend_label)
40+
41+
# Draw label
42+
text_renderer = Gruff::Renderer::Text.new(renderer, legend_label, font: @legend_font)
43+
text_renderer.add_to_render_queue(legend_label_width,
44+
legend_square_width,
45+
current_x_offset + (legend_square_width * 1.7),
46+
current_y_offset,
47+
Magick::WestGravity) # Change gravity to Magick::WestGravity for left alignment
48+
49+
# Now draw box with color of this dataset
50+
rect_renderer = Gruff::Renderer::Rectangle.new(renderer, color: store.data[index].color)
51+
rect_renderer.render(current_x_offset,
52+
current_y_offset,
53+
current_x_offset + legend_square_width,
54+
current_y_offset + legend_square_width)
55+
56+
current_y_offset += line_height
57+
end
58+
end
59+
end
60+
61+
def calculate_legend_height
62+
return 0.0 if @hide_legend
63+
64+
legend_labels = store.data.map(&:label)
65+
line_height = [legend_caps_height, @legend_box_size].max
66+
67+
# Divide by two because we always draw two box plots for each benchmark (before / after); one has an empty label
68+
((line_height * legend_labels.count) + (@legend_margin * (legend_labels.count - 1))) / 2
69+
end
70+
end
71+
end
72+
1873
# information regarding the benchmark file
1974
BENCH_FN = 'ci/perf/gpuc2_bench.csv'
2075
NAME_COL_1 = "test case" # first name column
@@ -42,7 +97,8 @@
4297
MAX_BENCHMARKS_TO_LIST = 3 # if more than this number of benchmarks is affected, just report the count
4398

4499
# file name for the message that will be posted
45-
MESSAGE_FN = "#{ENV['GITHUB_WORKSPACE']}/check_perf_message.txt"
100+
MESSAGE_DIR = "#{ENV['GITHUB_WORKSPACE']}/"
101+
MESSAGE_FN = "check_perf_message.txt"
46102

47103
# check if the expected files and env variables are present
48104
if !File.exist?(BENCH_FN)
@@ -68,7 +124,7 @@
68124
if bench_file_digest == $1
69125
puts "Same csv already processed, early exit"
70126
`echo "done=true" >> $GITHUB_OUTPUT`
71-
exit
127+
exit
72128
end
73129
end
74130

@@ -161,13 +217,12 @@ def get_wheel_color
161217
cur_chart_start_mean = 0
162218
cur_chart_idx = 0
163219
cur_img_idx = 0
220+
cur_chart_category = nil
221+
consecutive_category_count = 1
164222
g = nil
165-
prev_mean = 1
166223

167224
# closure for completing the current in-progress chart
168225
finish_chart = Proc.new do
169-
# generate a usable number of subdivisions
170-
g.y_axis_increment = prev_mean / 7
171226
# generate image
172227
img = g.to_image()
173228
# if there was a significant change, add border to image
@@ -183,64 +238,85 @@ def get_wheel_color
183238
in_chart = false
184239
end
185240

186-
old_data_map.sort_by { |k,v| mean(v) }.each do |bench_key, old_bench_raw|
187-
# skip deleted benchmarks
188-
next unless new_data_map.key?(bench_key)
189-
241+
all_keys = old_data_map.keys | new_data_map.keys
242+
all_keys.each do |bench_key|
190243
# gather some important information
191-
new_bench_raw = new_data_map[bench_key]
244+
old_bench_raw = old_data_map[bench_key] || nil
245+
new_bench_raw = new_data_map[bench_key] || nil
246+
is_new_or_removed = old_bench_raw.nil? || new_bench_raw.nil?
192247
bench_category = bench_key[0]
193248
bench_name = bench_key[1]
194-
195-
# finish the current chart if we have reached the maximum per image
196-
# or if the relative y axis difference becomes too large
197-
if in_chart && (cur_chart_start_mean < mean(old_bench_raw) / 20 ||
198-
cur_chart_idx >= MAX_CHARTS_PER_IMAGE)
199-
finish_chart.()
200-
end
201-
202-
# start a new chart
203-
if !in_chart
204-
g = Gruff::Box.new(GRAPH_WIDTH)
205-
g.theme_pastel
206-
g.hide_title = true
207-
g.marker_font_size = 15
208-
g.legend_at_bottom = true
209-
g.legend_font_size = 9
210-
g.legend_box_size = 10
211-
g.legend_margin = 2
212-
g.y_axis_label = "Time (nanoseconds)"
213-
214-
in_chart = true
215-
significant_perf_improvement_in_this_chart = false
216-
significant_perf_reduction_in_this_chart = false
217-
cur_chart_start_mean = mean(old_bench_raw)
218-
cur_chart_idx = 0
249+
if old_bench_raw.nil?
250+
bench_name = "ADDED: " + bench_name
251+
elsif new_bench_raw.nil?
252+
bench_name = "REMOVED: " + bench_name
219253
end
254+
# If both old and new data is available, we default to old - this is an arbitrary choice
255+
bench_mean = old_bench_raw.nil? ? mean(new_bench_raw) : mean(old_bench_raw)
220256

221257
# check if there was a highly significant difference
222-
new_median = median(scalar_add(new_bench_raw, FLAT_THRESHOLD_OFFSET))
223-
old_median = median(scalar_add(old_bench_raw, FLAT_THRESHOLD_OFFSET))
224-
rel_difference = new_median / old_median
225-
relative_times_per_category[bench_category] << rel_difference
226-
# we output these for easy inspection in the CI log
227-
puts "%3.2f <= %s" % [rel_difference, bench_name]
228-
if rel_difference > THRESHOLD_SLOW
229-
significantly_slower_benchmarks << bench_name
230-
significant_perf_reduction_in_this_chart = true
231-
elsif rel_difference < THRESHOLD_FAST
232-
significantly_faster_benchmarks << bench_name
233-
significant_perf_improvement_in_this_chart = true
258+
rel_difference = 0
259+
if !is_new_or_removed
260+
new_median = median(scalar_add(new_bench_raw, FLAT_THRESHOLD_OFFSET))
261+
old_median = median(scalar_add(old_bench_raw, FLAT_THRESHOLD_OFFSET))
262+
rel_difference = new_median / old_median
263+
relative_times_per_category[bench_category] << rel_difference
264+
# we output these for easy inspection in the CI log
265+
puts "%3.2f <= %s" % [rel_difference, bench_name]
266+
if rel_difference > THRESHOLD_SLOW
267+
significantly_slower_benchmarks << bench_name
268+
elsif rel_difference < THRESHOLD_FAST
269+
significantly_faster_benchmarks << bench_name
270+
end
234271
end
235272

236273
# add old and new boxes to chart if they are significant according to the charting thresholds
237-
if rel_difference > MINOR_THRESHOLD_SLOW || rel_difference < MINOR_THRESHOLD_FAST
274+
if is_new_or_removed || rel_difference > MINOR_THRESHOLD_SLOW || rel_difference < MINOR_THRESHOLD_FAST
275+
# finish current chart if the category has changed, the relative difference on the y-axis is too large,
276+
# or we've reached the maximum number of plots per image
277+
ratio = [cur_chart_start_mean, bench_mean].max / [cur_chart_start_mean, bench_mean].min
278+
if in_chart && (cur_chart_category != bench_category || ratio > 50 || cur_chart_idx >= MAX_CHARTS_PER_IMAGE)
279+
finish_chart.()
280+
end
281+
282+
# start a new chart if necessary
283+
if !in_chart
284+
g = Gruff::Box.new(GRAPH_WIDTH)
285+
g.theme_pastel
286+
g.title = bench_category
287+
g.marker_font_size = 15
288+
g.legend_at_bottom = true
289+
g.legend_font_size = 9
290+
g.legend_box_size = 10
291+
g.legend_margin = 2
292+
g.y_axis_label = "Time (nanoseconds)"
293+
g.stroke_width = 0.5
294+
295+
in_chart = true
296+
significant_perf_improvement_in_this_chart = false
297+
significant_perf_reduction_in_this_chart = false
298+
cur_chart_start_mean = bench_mean
299+
cur_chart_idx = 0
300+
if cur_chart_category == bench_category
301+
consecutive_category_count += 1
302+
g.title = bench_category + " (#{consecutive_category_count})"
303+
else
304+
consecutive_category_count = 1
305+
end
306+
cur_chart_category = bench_category
307+
end
308+
309+
if !is_new_or_removed && rel_difference > THRESHOLD_SLOW
310+
significant_perf_reduction_in_this_chart = true
311+
elsif !is_new_or_removed && rel_difference < THRESHOLD_FAST
312+
significant_perf_improvement_in_this_chart = true
313+
end
314+
315+
# plot the data
238316
g.data bench_name, old_bench_raw, get_wheel_color
239317
g.data nil, new_bench_raw, get_wheel_color
240318
cur_chart_idx += 1
241319
end
242-
243-
prev_mean = mean(old_bench_raw)
244320
end
245321
# don't forget to finish the last chart!
246322
finish_chart.()
@@ -293,7 +369,11 @@ def report_benchmark_list(list)
293369
end
294370
end
295371
end
296-
puts message
372+
puts "\n" + message
297373

298374
# write message to workspace file for subsequent step
299-
File.write(MESSAGE_FN, message)
375+
if File.writable?(MESSAGE_DIR)
376+
File.write(MESSAGE_DIR + MESSAGE_FN, message)
377+
else
378+
puts "\nCannot write to '#{MESSAGE_DIR}', skipping message generation"
379+
end

0 commit comments

Comments
 (0)