File tree Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Expand file tree Collapse file tree 2 files changed +8
-6
lines changed Original file line number Diff line number Diff line change 33from pathlib import Path
44
55import torch
6+ import transformers
67import typer
78import yaml
89from pydantic import ValidationError
9- from transformers import utils as hf_utils
1010from typing_extensions import Annotated
1111
1212import llmtune
2020from llmtune .utils .save_utils import DirectoryHelper
2121
2222
23- hf_utils .logging .set_verbosity_error ( )
23+ transformers .logging .set_verbosity ( transformers . logging . CRITICAL )
2424torch ._logging .set_logs (all = logging .CRITICAL )
25+ logging .captureWarnings (True )
2526
2627
2728app = typer .Typer ()
Original file line number Diff line number Diff line change 2323
2424# Model Definition -------------------
2525model :
26- hf_model_ckpt : " NousResearch/Llama-2-7b-hf "
26+ hf_model_ckpt : " mistralai/Mistral-7B-Instruct-v0.2 "
2727 torch_dtype : " bfloat16"
2828 # attn_implementation: "flash_attention_2"
2929 quantize : true
3636lora :
3737 task_type : " CAUSAL_LM"
3838 r : 32
39+ lora_alpha : 64
3940 lora_dropout : 0.1
4041 target_modules :
4142 - q_proj
@@ -49,12 +50,12 @@ lora:
4950# Training -------------------
5051training :
5152 training_args :
52- num_train_epochs : 5
53+ num_train_epochs : 1
5354 per_device_train_batch_size : 4
5455 gradient_accumulation_steps : 4
5556 gradient_checkpointing : True
5657 optim : " paged_adamw_32bit"
57- logging_steps : 100
58+ logging_steps : 1
5859 learning_rate : 2.0e-4
5960 bf16 : true # Set to true for mixed precision training on Newer GPUs
6061 tf32 : true
@@ -67,7 +68,7 @@ training:
6768 # neftune_noise_alpha: None
6869
6970inference :
70- max_new_tokens : 1024
71+ max_new_tokens : 256
7172 use_cache : True
7273 do_sample : True
7374 top_p : 0.9
You can’t perform that action at this time.
0 commit comments