Skip to content

Commit 20be6e2

Browse files
committed
Merge branch 'main' into fix-generate
2 parents 7b96c45 + 98e1ad4 commit 20be6e2

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

llmtune/cli/toolkit.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
from pathlib import Path
44

55
import torch
6+
import transformers
67
import typer
78
import yaml
89
from pydantic import ValidationError
9-
from transformers import utils as hf_utils
1010
from typing_extensions import Annotated
1111

1212
import llmtune
@@ -20,8 +20,9 @@
2020
from llmtune.utils.save_utils import DirectoryHelper
2121

2222

23-
hf_utils.logging.set_verbosity_error()
23+
transformers.logging.set_verbosity(transformers.logging.CRITICAL)
2424
torch._logging.set_logs(all=logging.CRITICAL)
25+
logging.captureWarnings(True)
2526

2627

2728
app = typer.Typer()

llmtune/config.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ data:
2323

2424
# Model Definition -------------------
2525
model:
26-
hf_model_ckpt: "NousResearch/Llama-2-7b-hf"
26+
hf_model_ckpt: "mistralai/Mistral-7B-Instruct-v0.2"
2727
torch_dtype: "bfloat16"
2828
#attn_implementation: "flash_attention_2"
2929
quantize: true
@@ -36,6 +36,7 @@ model:
3636
lora:
3737
task_type: "CAUSAL_LM"
3838
r: 32
39+
lora_alpha: 64
3940
lora_dropout: 0.1
4041
target_modules:
4142
- q_proj
@@ -49,12 +50,12 @@ lora:
4950
# Training -------------------
5051
training:
5152
training_args:
52-
num_train_epochs: 5
53+
num_train_epochs: 1
5354
per_device_train_batch_size: 4
5455
gradient_accumulation_steps: 4
5556
gradient_checkpointing: True
5657
optim: "paged_adamw_32bit"
57-
logging_steps: 100
58+
logging_steps: 1
5859
learning_rate: 2.0e-4
5960
bf16: true # Set to true for mixed precision training on Newer GPUs
6061
tf32: true
@@ -67,7 +68,7 @@ training:
6768
# neftune_noise_alpha: None
6869

6970
inference:
70-
max_new_tokens: 1024
71+
max_new_tokens: 256
7172
use_cache: True
7273
do_sample: True
7374
top_p: 0.9

0 commit comments

Comments
 (0)