1212 #'tetris',
1313 #'g2048',
1414 #'moba',
15- # 'pong',
15+ 'pong' ,
1616 #'tower_climb',
1717 #'grid',
1818 #'nmmo3',
4343 'vec/total_agents' ,
4444]
4545
46- ALL_KEYS = [
46+ METRICS = [
4747 'agent_steps' ,
48- 'cost ' ,
48+ 'uptime ' ,
4949 'environment/score' ,
5050 'environment/perf'
51- ] + HYPERS
51+ ]
52+
53+ ALL_KEYS = METRICS + HYPERS
5254
5355def pareto_idx (steps , costs , scores ):
5456 idxs = []
@@ -63,53 +65,46 @@ def pareto_idx(steps, costs, scores):
6365
6466def load_sweep_data (path ):
6567 data = {}
66- keys = None
68+ sweep_metadata = {}
6769 for fpath in glob .glob (path ):
6870 if 'cache.json' in fpath :
6971 continue
7072
7173 with open (fpath , 'r' ) as f :
7274 exp = json .load (f )
7375
74- if not data :
75- for kk in exp .keys ():
76- if kk == 'data' :
77- for k , v in exp [ kk ][ - 1 ]. items ():
78- data [ k ] = []
79- else :
80- data [ kk ] = []
81-
82- discard = False
83- for kk in list ( data . keys () ):
84- if kk not in exp and kk not in exp [ 'data' ][ - 1 ]:
85- discard = True
86- break
87-
88- if discard :
89- continue
76+ if not sweep_metadata :
77+ for k , v in exp .items ():
78+ if k . startswith ( 'sweep/' ) :
79+ sweep_metadata [ k . replace ( 'sweep/' , '' )] = v
80+
81+ for k , v in exp . items () :
82+ if k . startswith ( 'sweep/' ):
83+ continue
84+
85+ if isinstance ( v , dict ):
86+ continue
87+
88+ if k not in data :
89+ data [ k ] = []
90+
91+ data [ k ]. append ( v )
9092
91- for kk in list (data .keys ()):
92- if kk in exp :
93- v = exp [kk ]
94- sweep_key = f'sweep/{ kk } /distribution'
95- if sweep_key in data and exp [sweep_key ] == 'logit_normal' :
96- v = 1 - v
97- elif kk in ('train/vtrace_rho_clip' , 'train/vtrace_c_clip' ):
98- v = max (v , 0.1 )
93+ for k in METRICS :
94+ if k not in data :
95+ data [k ] = []
9996
100- data [kk ].append (v )
101- else :
102- data [kk ].append (exp ['data' ][- 1 ][kk ])
97+ data [k ].append (exp ['data' ][- 1 ][k ])
10398
10499 steps = data ['agent_steps' ]
105- costs = data ['cost ' ]
100+ costs = data ['uptime ' ]
106101 scores = data ['environment/score' ]
107102
108103 idxs = pareto_idx (steps , costs , scores )
109104
110105 # Filter to pareto
111- for k in data :
112- data [k ] = [data [k ][i ] for i in idxs ]
106+ # for k in data:
107+ # data[k] = [data[k][i] for i in idxs]
113108
114109 # Monkey patch: Cap performance
115110 data ['environment/perf' ] = [min (e , 1.0 ) for e in data ['environment/perf' ]]
@@ -119,6 +114,7 @@ def load_sweep_data(path):
119114 skip = data ['env/frameskip' ]
120115 data ['agent_steps' ] = [n * m for n , m in zip (data ['agent_steps' ], skip )]
121116
117+ data ['sweep' ] = sweep_metadata
122118 return data
123119
124120def cached_sweep_load (path , env_name ):
@@ -136,20 +132,21 @@ def cached_sweep_load(path, env_name):
136132
137133def compute_tsne ():
138134 data = {name : cached_sweep_load (f'experiments/logs/puffer_{ name } ' , name ) for name in env_names }
135+ sweep_metadata = {name : data [name ].pop ('sweep' ) for name in env_names }
139136
140137 flat = []
141138 flat_mmin = []
142139 flat_mmax = []
143140 for env in env_names :
144141 flat .append (np .stack ([data [env ][hyper ] for hyper in HYPERS ], axis = 1 ))
145- flat_mmin .append (np .stack ([data [env ][f'sweep/ { hyper } /min' ] for hyper in HYPERS ], axis = 1 ))
146- flat_mmax .append (np .stack ([data [env ][f'sweep/ { hyper } /max' ] for hyper in HYPERS ], axis = 1 ))
142+ flat_mmin .append (np .stack ([sweep_metadata [env ][f'{ hyper } /min' ] for hyper in HYPERS ]))
143+ flat_mmax .append (np .stack ([sweep_metadata [env ][f'{ hyper } /max' ] for hyper in HYPERS ]))
147144
148- flat_distribution = [data [env ][f'sweep/ { hyper } /distribution' ] for env in env_names for hyper in HYPERS ]
145+ flat_distribution = [sweep_metadata [env ][f'{ hyper } /distribution' ] for env in env_names for hyper in HYPERS ]
149146
150147 flat = np .concatenate (flat , axis = 0 )
151- flat_mmin = np .concatenate (flat_mmin , axis = 0 ).min (axis = 0 )
152- flat_mmax = np .concatenate (flat_mmax , axis = 0 ).max (axis = 0 )
148+ flat_mmin = np .stack (flat_mmin , axis = 0 ).min (axis = 0 )
149+ flat_mmax = np .stack (flat_mmax , axis = 0 ).max (axis = 0 )
153150
154151 normed = flat .copy ()
155152 for i in range (len (HYPERS )):
@@ -178,7 +175,7 @@ def compute_tsne():
178175 '''
179176 sz = len (data [env ]['agent_steps' ])
180177
181- data [env ] = {k : v for k , v in data [env ].items () if k in ALL_KEYS }
178+ # data[env] = {k: v for k, v in data[env].items() if k in ALL_KEYS}
182179 if reduced is not None :
183180 data [env ]['tsne1' ] = reduced [row :row + sz , 0 ].tolist ()
184181 data [env ]['tsne2' ] = reduced [row :row + sz , 1 ].tolist ()
0 commit comments