Skip to content

Commit dfa814d

Browse files
author
sandeepchittilla
committed
Update hyperparementers
1 parent 737a496 commit dfa814d

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

examples/dpo_ultrafeedback.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@
2929
),
3030
model=ModelConfig(model_path="HuggingFaceH4/mistral-7b-sft-beta", num_layers_unfrozen=-1),
3131
tokenizer=TokenizerConfig(tokenizer_path="HuggingFaceH4/mistral-7b-sft-beta", truncation_side="right"),
32-
optimizer=OptimizerConfig(name="adamw", kwargs=dict(lr=1e-6, betas=(0.9, 0.95), eps=1.0e-8, weight_decay=1.0e-6)),
32+
optimizer=OptimizerConfig(name="adamw", kwargs=dict(lr=2e-5, betas=(0.9, 0.999), eps=1.0e-8, weight_decay=1.0e-6)),
3333
scheduler=SchedulerConfig(name="cosine_annealing", kwargs=dict(T_max=1e12, eta_min=1.0e-4)), # train.total_steps
3434
method=DPOConfig(
3535
name="DPOConfig",
36-
gen_kwargs=dict(max_new_tokens=40, top_k=20, top_p=1.0, do_sample=True),
36+
gen_kwargs=dict(max_new_tokens=256, temperature=0.7, top_k=50, top_p=0.95, do_sample=True),
3737
beta=0.1,
3838
label_pad_token_id=-100,
3939
padding_value=0,
@@ -59,7 +59,7 @@ def main(hparams={}):
5959
trlx.train(
6060
config=config,
6161
samples=dataset["train_prefs"]["dpo"],
62-
eval_prompts=dataset["test_prefs"]["prompt"][:8],
62+
eval_prompts=dataset["test_prefs"]["prompt"][:128],
6363
# metric_fn=lambda **kwargs: {"reward": reward_fn(**kwargs)},
6464
stop_sequences=["User:", "user:", "Assistant:", "assistant:"]
6565
+ ["{e}x {i}put:".format(e=e, i=i) for e, i in itertools.product(["e", "E"], ["in", "In", "out", "Out"])],

0 commit comments

Comments
 (0)