Skip to content

Commit daa9a0a

Browse files
authored
Merge pull request #3 from converged-computing/test-quicksilver
Quicksilver results
2 parents abea9d7 + 79e13f3 commit daa9a0a

21 files changed

+10579
-84
lines changed
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import os
5+
import sys
6+
import re
7+
8+
import matplotlib.pylab as plt
9+
import seaborn as sns
10+
11+
here = os.path.dirname(os.path.abspath(__file__))
12+
analysis_root = os.path.dirname(here)
13+
root = os.path.dirname(analysis_root)
14+
sys.path.insert(0, analysis_root)
15+
16+
import performance_study as ps
17+
18+
sns.set_theme(style="whitegrid", palette="muted")
19+
20+
21+
def get_parser():
22+
parser = argparse.ArgumentParser(
23+
description="Run analysis",
24+
formatter_class=argparse.RawTextHelpFormatter,
25+
)
26+
parser.add_argument(
27+
"--root",
28+
help="root directory with experiments",
29+
default=os.path.join(root, "experiments"),
30+
)
31+
parser.add_argument(
32+
"--non-anon",
33+
help="Generate non-anon",
34+
action="store_true",
35+
default=False,
36+
)
37+
parser.add_argument(
38+
"--out",
39+
help="directory to save parsed results",
40+
default=os.path.join(here, "data"),
41+
)
42+
return parser
43+
44+
45+
def main():
46+
"""
47+
Find application result files to parse.
48+
"""
49+
parser = get_parser()
50+
args, _ = parser.parse_known_args()
51+
52+
# Output images and data
53+
outdir = os.path.abspath(args.out)
54+
indir = os.path.abspath(args.root)
55+
56+
# We absolutely want on premises results here
57+
if not os.path.exists(outdir):
58+
os.makedirs(outdir)
59+
60+
# Find input files (skip anything with test)
61+
files = ps.find_inputs(indir, "quicksilver")
62+
if not files:
63+
raise ValueError(f"There are no input files in {indir}")
64+
65+
# Saves raw data to file
66+
df = parse_data(indir, outdir, files)
67+
plot_results(df, outdir, args.non_anon)
68+
69+
70+
71+
72+
def parse_data(indir, outdir, files):
73+
"""
74+
Parse filepaths for environment, etc., and results files for data.
75+
"""
76+
# metrics here will be figures of merit, and seconds runtime
77+
p = ps.ResultParser("quicksilver")
78+
79+
# For flux we can save jobspecs and other event data
80+
data = {}
81+
82+
# It's important to just parse raw data once, and then use intermediate
83+
for filename in files:
84+
exp = ps.ExperimentNameParser(filename, indir)
85+
if exp.prefix not in data:
86+
data[exp.prefix] = []
87+
88+
# Set the parsing context for the result data frame
89+
p.set_context(exp.cloud, exp.env, exp.env_type, exp.size)
90+
91+
# Sanity check the files we found
92+
if "single-thread" in filename:
93+
print(f'Skipping {filename}')
94+
continue
95+
print(filename)
96+
exp.show()
97+
98+
item = ps.read_file(filename)
99+
jobs = ps.parse_flux_jobs(item)
100+
101+
for job, metadata in jobs.items():
102+
# Let's just parse figure of merit for now
103+
# Figure Of Merit 9.519e+08 [Num Segments / Cycle Tracking Time]
104+
fom = [x for x in metadata['log'].split("\n") if x.startswith("Figure Of Merit")]
105+
if not fom:
106+
print(
107+
f"Filename {result} is missing a figure of merit - likely did not finish."
108+
)
109+
continue
110+
fom = float([x for x in fom[0].split(" ") if x][3])
111+
p.add_result("duration", metadata['duration'])
112+
p.add_result("num_segments_over_cycle_tracking_time", fom)
113+
114+
print("Done parsing quicksilver results!")
115+
116+
# Save stuff to file first
117+
p.df.to_csv(os.path.join(outdir, "quicksilver-results.csv"))
118+
ps.write_json(jobs, os.path.join(outdir, "flux-jobs-and-events.json"))
119+
return p.df
120+
121+
def plot_results(df, outdir, non_anon=False):
122+
"""
123+
Plot analysis results
124+
"""
125+
# Let's get some shoes! Err, plots.
126+
# Make an image outdir
127+
img_outdir = os.path.join(outdir, "img")
128+
if not os.path.exists(img_outdir):
129+
os.makedirs(img_outdir)
130+
131+
# TODO do we want to do this?
132+
# ps.print_experiment_cost(df, outdir)
133+
134+
# We are going to put the plots together, and the colors need to match!
135+
cloud_colors = {}
136+
for cloud in df.experiment.unique():
137+
cloud_colors[cloud] = ps.match_color(cloud)
138+
139+
# Within a setup, compare between experiments for GPU and cpu
140+
frames = {}
141+
for env in df.env_type.unique():
142+
subset = df[df.env_type == env]
143+
144+
# Make a plot for seconds runtime, and each FOM set.
145+
# We can look at the metric across sizes, colored by experiment
146+
for metric in subset.metric.unique():
147+
metric_df = subset[subset.metric == metric]
148+
title = " ".join([x.capitalize() for x in metric.split("_")])
149+
frames[metric] = {'cpu': metric_df}
150+
151+
for metric, data_frames in frames.items():
152+
# We only have one for now :)
153+
fig = plt.figure(figsize=(9, 3.3))
154+
gs = plt.GridSpec(1, 2, width_ratios=[2, 1])
155+
axes = []
156+
axes.append(fig.add_subplot(gs[0, 0]))
157+
axes.append(fig.add_subplot(gs[0, 1]))
158+
159+
sns.set_style("whitegrid")
160+
sns.barplot(
161+
data_frames["cpu"],
162+
ax=axes[0],
163+
x="nodes",
164+
y="value",
165+
hue="experiment",
166+
err_kws={"color": "darkred"},
167+
hue_order=[
168+
"google/gke/cpu",
169+
],
170+
palette=cloud_colors,
171+
order=[4, 8, 16, 32, 64, 128],
172+
)
173+
if "segment" not in metric:
174+
axes[0].set_title(f"Quicksilver {metric.capitalize()} (CPU)", fontsize=14)
175+
axes[0].set_ylabel("Seconds", fontsize=14)
176+
else:
177+
axes[0].set_title("Quicksilver Segments Over Cycle Tracking Time (CPU)", fontsize=12)
178+
axes[0].set_ylabel("Segments", fontsize=14)
179+
axes[0].set_xlabel("Nodes", fontsize=14)
180+
181+
handles, labels = axes[0].get_legend_handles_labels()
182+
labels = ["/".join(x.split("/")[0:2]) for x in labels]
183+
axes[1].legend(
184+
handles, labels, loc="center left", bbox_to_anchor=(-0.1, 0.5), frameon=False
185+
)
186+
for ax in axes[0:1]:
187+
ax.get_legend().remove()
188+
axes[1].axis("off")
189+
190+
plt.tight_layout()
191+
plt.savefig(os.path.join(img_outdir, f"quicksilver-{metric}-cpu.svg"))
192+
plt.savefig(os.path.join(img_outdir, f"quicksilver-{metric}-cpu.png"))
193+
plt.clf()
194+
195+
# Print the total number of data points
196+
print(f'Total number of CPU datum: {data_frames["cpu"].shape[0]}')
197+
198+
199+
if __name__ == "__main__":
200+
main()

analysis/quicksilver/data/flux-jobs-and-events.json

Lines changed: 822 additions & 0 deletions
Large diffs are not rendered by default.
Loading

0 commit comments

Comments
 (0)