-
Notifications
You must be signed in to change notification settings - Fork 803
Description
Describe the bug
error during val
train.sh:
swift rlhf
--rlhf_type grpo
--model "/data/Emotion/chenligen/Qwen2-Audio/model/Qwen2-Audio-7B-Instruct"
--output_dir "/data/Emotion/chenligen/Qwen2-Audio/output/prompt2"
--dataset "/data/Emotion/chenligen/Qwen2-Audio/data/train_prompt2.jsonl"
--val_dataset "/data/Emotion/chenligen/Qwen2-Audio/data/val_prompt2.jsonl"
--per_device_train_batch_size 4
--per_device_eval_batch_size 4
--gradient_accumulation_steps 2
--generation_batch_size 64
--num_train_epochs 5
--learning_rate 1e-6
--lr_scheduler_type cosine
--weight_decay 0.01
--warmup_ratio 0.03
--max_length 2048
--max_completion_length 512
--external_plugins "/data/Emotion/chenligen/Qwen2-Audio/plugin.py"
--reward_funcs reward_prompt1
--reward_weights 1.0
--truncation_strategy delete
--report_to tensorboard
--loss_type grpo
--remove_unused_column False
--log_completions True
--logging_steps 10
--save_steps 100
--eval_steps 100
--save_total_limit 3
--num_generations 8
--temperature 1.0
--kl_coef 0.04
--dataloader_num_workers 4
--dataset_num_proc 4 \
Your hardware and system info
swift-3.7.0,py310