diff --git a/scripts/Sample_ft.ipynb b/scripts/Sample_ft.ipynb new file mode 100644 index 0000000..5ebdb14 --- /dev/null +++ b/scripts/Sample_ft.ipynb @@ -0,0 +1,290 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prepare Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def convert_to_conversation(sample):\n", + " instruction = \"You are an expert radiographer. Describe accurately what you see in this image.\"\n", + " conversation = [\n", + " { \"role\": \"user\",\n", + " \"content\" : [\n", + " {\"type\" : \"text\", \"text\" : instruction},\n", + " {\"type\" : \"image\", \"image\" : sample[\"image\"]} ]\n", + " },\n", + " { \"role\" : \"assistant\",\n", + " \"content\" : [\n", + " {\"type\" : \"text\", \"text\" : sample[\"caption\"]} ]\n", + " },\n", + " ]\n", + " return { \"messages\" : conversation }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def datasplit(train_num, test_num):\n", + " from datasets import load_dataset\n", + " dataset = load_dataset(\"unsloth/Radiology_mini\")\n", + " test_data = dataset[\"test\"].select(range(test_num))\n", + " train_data = dataset[\"train\"].select(range(train_num))\n", + " print(test_data)\n", + " print(train_data)\n", + " converted_dataset = [convert_to_conversation(sample) for sample in train_data]\n", + " return converted_dataset, test_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prepare model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from unsloth import FastVisionModel # FastLanguageModel for LLMs\n", + "import torch\n", + "def load_model():\n", + " model, tokenizer = FastVisionModel.from_pretrained(\n", + " \"unsloth/Qwen2-VL-2B-Instruct\",\n", + " load_in_4bit = False, # Use 4bit to reduce memory use. False for 16bit LoRA.\n", + " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for long context\n", + " )\n", + " model = FastVisionModel.get_peft_model(\n", + " model,\n", + " finetune_vision_layers = False, # False if not finetuning vision layers\n", + " finetune_language_layers = True, # False if not finetuning language layers\n", + " finetune_attention_modules = True, # False if not finetuning attention layers\n", + " finetune_mlp_modules = True, # False if not finetuning MLP layers\n", + "\n", + " r = 16, # The larger, the higher the accuracy, but might overfit\n", + " lora_alpha = 16, # Recommended alpha == r at least\n", + " lora_dropout = 0,\n", + " bias = \"none\",\n", + " random_state = 3407,\n", + " use_rslora = False, # We support rank stabilized LoRA\n", + " loftq_config = None, # And LoftQ\n", + " # target_modules = \"all-linear\", # Optional now! Can specify a list if needed\n", + " )\n", + " return model, tokenizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create Trainer object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from unsloth import is_bf16_supported\n", + "from unsloth.trainer import UnslothVisionDataCollator\n", + "from trl import SFTTrainer, SFTConfig\n", + "\n", + "def prep_train(model, tokenizer, converted_dataset, num_step, num_epoch):\n", + " FastVisionModel.for_training(model) # Enable for training!\n", + "\n", + " trainer = SFTTrainer(\n", + " model = model,\n", + " tokenizer = tokenizer,\n", + " data_collator = UnslothVisionDataCollator(model, tokenizer), # Must use!\n", + " train_dataset = converted_dataset,\n", + " args = SFTConfig(\n", + " per_device_train_batch_size = 2,\n", + " gradient_accumulation_steps = 4,\n", + " warmup_steps = 5,\n", + " max_steps = num_step*num_epoch ,\n", + " # num_train_epochs = 1, # Set this instead of max_steps for full training runs\n", + " learning_rate = 2e-4,\n", + " fp16 = not is_bf16_supported(),\n", + " bf16 = is_bf16_supported(),\n", + " logging_steps = 30,\n", + " optim = \"adamw_8bit\",\n", + " weight_decay = 0.01,\n", + " lr_scheduler_type = \"linear\",\n", + " seed = 3407,\n", + " output_dir = \"outputs\",\n", + " report_to = \"none\", # For Weights and Biases\n", + "\n", + " # You MUST put the below items for vision finetuning:\n", + " remove_unused_columns = False,\n", + " dataset_text_field = \"\",\n", + " dataset_kwargs = {\"skip_prepare_dataset\": True},\n", + " dataset_num_proc = 4,\n", + " max_seq_length = 2048,\n", + " ),\n", + " )\n", + " return trainer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start memory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def init_mem():\n", + " gpu_stats = torch.cuda.get_device_properties(0)\n", + " start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", + " return start_gpu_memory" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get Memory Status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_mem(trainer_stats, start_gpu_memory):\n", + " used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n", + " used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n", + " min_time = round(trainer_stats.metrics['train_runtime']/60, 2)\n", + " return min_time, used_memory, used_memory_for_lora" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Get Response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_res(model, tokenizer, test_data):\n", + " FastVisionModel.for_inference(model) # Enable for inference!\n", + " num = len(test_data)\n", + " response = {}\n", + " for i in range(num):\n", + " image = test_data[i][\"image\"]\n", + " instruction = \"You are an expert radiographer. Describe accurately what you see in this image.\"\n", + "\n", + " messages = [\n", + " {\"role\": \"user\", \"content\": [\n", + " {\"type\": \"image\"},\n", + " {\"type\": \"text\", \"text\": instruction}\n", + " ]}\n", + " ]\n", + " input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)\n", + " inputs = tokenizer(\n", + " image,\n", + " input_text,\n", + " add_special_tokens = False,\n", + " return_tensors = \"pt\",\n", + " ).to(\"cuda\")\n", + "\n", + " from transformers import TextStreamer\n", + " text_streamer = TextStreamer(tokenizer, skip_prompt = True)\n", + " output_ids = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,\n", + " use_cache = True, temperature = 1.5, min_p = 0.1)\n", + " generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)\n", + " response[i] = generated_text\n", + " return response" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate BERTScore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from bert_score import score as bert_score\n", + "\n", + "def evaluate(response, test_data):\n", + " bert_p_scores, bert_r_scores, bert_f1_scores = [], [], []\n", + "\n", + " results = {}\n", + "\n", + " # Evaluate each response\n", + " for i in range(len(response)):\n", + " reference = test_data[i][\"caption\"]\n", + " lines = response[i].splitlines()\n", + " hypothesis = \"\\n\".join(lines[4:])\n", + "\n", + " # BERTScore\n", + " P, R, F1 = bert_score([hypothesis], [reference], lang=\"en\", verbose=False)\n", + " bert_p_scores.append(P.item())\n", + " bert_r_scores.append(R.item())\n", + " bert_f1_scores.append(F1.item())\n", + "\n", + " # Compute average scores\n", + " avg_bert_p = np.mean(bert_p_scores)\n", + " avg_bert_r = np.mean(bert_r_scores)\n", + " avg_bert_f1 = np.mean(bert_f1_scores)\n", + " results[\"BERT_Precision\"] = avg_bert_p\n", + " results[\"BERT_Recall\"] = avg_bert_r\n", + " results[\"BERT_F1\"] = avg_bert_f1\n", + " return results\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cr", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}