Skip to content

Commit 516c46c

Browse files
committed
zero rollout buffer
2 parents d411b3a + eebcfbf commit 516c46c

6 files changed

Lines changed: 285 additions & 130 deletions

File tree

cache_data.py

Lines changed: 38 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#'tetris',
1313
#'g2048',
1414
#'moba',
15-
#'pong',
15+
'pong',
1616
#'tower_climb',
1717
#'grid',
1818
#'nmmo3',
@@ -43,12 +43,14 @@
4343
'vec/total_agents',
4444
]
4545

46-
ALL_KEYS = [
46+
METRICS = [
4747
'agent_steps',
48-
'cost',
48+
'uptime',
4949
'environment/score',
5050
'environment/perf'
51-
] + HYPERS
51+
]
52+
53+
ALL_KEYS = METRICS + HYPERS
5254

5355
def pareto_idx(steps, costs, scores):
5456
idxs = []
@@ -63,53 +65,46 @@ def pareto_idx(steps, costs, scores):
6365

6466
def load_sweep_data(path):
6567
data = {}
66-
keys = None
68+
sweep_metadata = {}
6769
for fpath in glob.glob(path):
6870
if 'cache.json' in fpath:
6971
continue
7072

7173
with open(fpath, 'r') as f:
7274
exp = json.load(f)
7375

74-
if not data:
75-
for kk in exp.keys():
76-
if kk == 'data':
77-
for k, v in exp[kk][-1].items():
78-
data[k] = []
79-
else:
80-
data[kk] = []
81-
82-
discard = False
83-
for kk in list(data.keys()):
84-
if kk not in exp and kk not in exp['data'][-1]:
85-
discard = True
86-
break
87-
88-
if discard:
89-
continue
76+
if not sweep_metadata:
77+
for k, v in exp.items():
78+
if k.startswith('sweep/'):
79+
sweep_metadata[k.replace('sweep/', '')] = v
80+
81+
for k, v in exp.items():
82+
if k.startswith('sweep/'):
83+
continue
84+
85+
if isinstance(v, dict):
86+
continue
87+
88+
if k not in data:
89+
data[k] = []
90+
91+
data[k].append(v)
9092

91-
for kk in list(data.keys()):
92-
if kk in exp:
93-
v = exp[kk]
94-
sweep_key = f'sweep/{kk}/distribution'
95-
if sweep_key in data and exp[sweep_key] == 'logit_normal':
96-
v = 1 - v
97-
elif kk in ('train/vtrace_rho_clip', 'train/vtrace_c_clip'):
98-
v = max(v, 0.1)
93+
for k in METRICS:
94+
if k not in data:
95+
data[k] = []
9996

100-
data[kk].append(v)
101-
else:
102-
data[kk].append(exp['data'][-1][kk])
97+
data[k].append(exp['data'][-1][k])
10398

10499
steps = data['agent_steps']
105-
costs = data['cost']
100+
costs = data['uptime']
106101
scores = data['environment/score']
107102

108103
idxs = pareto_idx(steps, costs, scores)
109104

110105
# Filter to pareto
111-
for k in data:
112-
data[k] = [data[k][i] for i in idxs]
106+
#for k in data:
107+
# data[k] = [data[k][i] for i in idxs]
113108

114109
# Monkey patch: Cap performance
115110
data['environment/perf'] = [min(e, 1.0) for e in data['environment/perf']]
@@ -119,6 +114,7 @@ def load_sweep_data(path):
119114
skip = data['env/frameskip']
120115
data['agent_steps'] = [n*m for n, m in zip(data['agent_steps'], skip)]
121116

117+
data['sweep'] = sweep_metadata
122118
return data
123119

124120
def cached_sweep_load(path, env_name):
@@ -136,20 +132,21 @@ def cached_sweep_load(path, env_name):
136132

137133
def compute_tsne():
138134
data = {name: cached_sweep_load(f'experiments/logs/puffer_{name}', name) for name in env_names}
135+
sweep_metadata = {name: data[name].pop('sweep') for name in env_names}
139136

140137
flat = []
141138
flat_mmin = []
142139
flat_mmax = []
143140
for env in env_names:
144141
flat.append(np.stack([data[env][hyper] for hyper in HYPERS], axis=1))
145-
flat_mmin.append(np.stack([data[env][f'sweep/{hyper}/min'] for hyper in HYPERS], axis=1))
146-
flat_mmax.append(np.stack([data[env][f'sweep/{hyper}/max'] for hyper in HYPERS], axis=1))
142+
flat_mmin.append(np.stack([sweep_metadata[env][f'{hyper}/min'] for hyper in HYPERS]))
143+
flat_mmax.append(np.stack([sweep_metadata[env][f'{hyper}/max'] for hyper in HYPERS]))
147144

148-
flat_distribution = [data[env][f'sweep/{hyper}/distribution'] for env in env_names for hyper in HYPERS]
145+
flat_distribution = [sweep_metadata[env][f'{hyper}/distribution'] for env in env_names for hyper in HYPERS]
149146

150147
flat = np.concatenate(flat, axis=0)
151-
flat_mmin = np.concatenate(flat_mmin, axis=0).min(axis=0)
152-
flat_mmax = np.concatenate(flat_mmax, axis=0).max(axis=0)
148+
flat_mmin = np.stack(flat_mmin, axis=0).min(axis=0)
149+
flat_mmax = np.stack(flat_mmax, axis=0).max(axis=0)
153150

154151
normed = flat.copy()
155152
for i in range(len(HYPERS)):
@@ -178,7 +175,7 @@ def compute_tsne():
178175
'''
179176
sz = len(data[env]['agent_steps'])
180177

181-
data[env] = {k: v for k, v in data[env].items() if k in ALL_KEYS}
178+
#data[env] = {k: v for k, v in data[env].items() if k in ALL_KEYS}
182179
if reduced is not None:
183180
data[env]['tsne1'] = reduced[row:row+sz, 0].tolist()
184181
data[env]['tsne2'] = reduced[row:row+sz, 1].tolist()

pufferlib/config/ocean/grid.ini

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,39 +17,37 @@ num_maps = 8192
1717
map_size = -1
1818

1919
[train]
20-
# Best params
21-
#total_timesteps = 435_000_000
22-
#adam_beta1 = 0.9801350114303844
23-
#adam_beta2 = 0.9931056135397744
24-
#adam_eps = 6.024885743259763e-8
25-
#clip_coef = 0.283658795325587
26-
#ent_coef = 0.007885530106105381
27-
#gae_lambda = 0.9574676436577135
28-
#gamma = 0.9961782334639131
29-
#learning_rate = 0.0007890771333884192
30-
#max_grad_norm = 2.5271346931510053
31-
#minibatch_size = 8192
32-
#prio_alpha = 0.8735470630752789
33-
#prio_beta0 = 0.6533958384978629
34-
#vf_clip_coef = 1.9338563232919095
35-
#vf_coef = 3.915248046963283
36-
#vtrace_c_clip = 1.018588814067991
37-
#vtrace_rho_clip = 2.4215244529216466
20+
#beta1 = 0.9581470335001424
21+
#beta2 = 0.9939188445458832
22+
#clip_coef = 0.4056152781082447
23+
#ent_coef = 0.0005788152954138715
24+
#eps = 0.00000000017742923522
25+
#gae_lambda = 0.9701506766299144
26+
#gamma = 0.986452277080654
27+
#learning_rate = 0.0029890509569117014
28+
#max_grad_norm = 3.235365412303803
29+
#minibatch_size = 32768
30+
#prio_alpha = 0.7936109272717734
31+
#prio_beta0 = 0.95653936979398
32+
#replay_ratio = 2.168572638529935
33+
#total_timesteps = 195748615
34+
#vf_clip_coef = 1.6903254428390564
35+
#vf_coef = 0.8650810043551426
36+
#vtrace_c_clip = 1.3763043127146155
37+
#vtrace_rho_clip = 4.674513875485618
3838

39-
# New sweep best params
4039
total_timesteps = 435_000_000
4140
beta1 = 0.9493079570168755
4241
beta2 = 0.9998213228757207
4342
eps = 2.16720639574209e-8
44-
horizon = 64
43+
bptt_horizon = 64
4544
clip_coef = 0.399530686596841
4645
ent_coef = 0.0017271288609381147
4746
gae_lambda = 0.9491722822649111
4847
gamma = 0.9877360824574745
4948
learning_rate = 0.0012892859713461897
5049
max_grad_norm = 3.016348031602564
51-
#minibatch_size = 8192
52-
minibatch_size = 32768
50+
minibatch_size = 8192
5351
prio_alpha = 0.8219794821639037
5452
prio_beta0 = 0.9447478232810274
5553
vf_clip_coef = 0.6051579400844748

pufferlib/config/ocean/pong.ini

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,6 @@ vf_coef = 1.6832989594296321
4343
vtrace_c_clip = 2.878171091654008
4444
vtrace_rho_clip = 0.7876748061547312
4545

46-
[sweep]
47-
downsample = 5
48-
max_cost = 10
49-
5046
[sweep.train.total_timesteps]
5147
distribution = log_normal
5248
min = 5e6

0 commit comments

Comments
 (0)