jaseci-labs · Thamirawaran · Apr 4, 2025 · Copilot · Apr 4, 2025
diff --git a/scripts/Sample_ft.ipynb b/scripts/Sample_ft.ipynb
@@ -0,0 +1,290 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Prepare Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def convert_to_conversation(sample):\n",
+    "    instruction = \"You are an expert radiographer. Describe accurately what you see in this image.\"\n",
+    "    conversation = [\n",
+    "        { \"role\": \"user\",\n",
+    "          \"content\" : [\n",
+    "            {\"type\" : \"text\",  \"text\"  : instruction},\n",
+    "            {\"type\" : \"image\", \"image\" : sample[\"image\"]} ]\n",
+    "        },\n",
+    "        { \"role\" : \"assistant\",\n",
+    "          \"content\" : [\n",
+    "            {\"type\" : \"text\",  \"text\"  : sample[\"caption\"]} ]\n",
+    "        },\n",
+    "    ]\n",
+    "    return { \"messages\" : conversation }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def datasplit(train_num, test_num):\n",
+    "    from datasets import load_dataset\n",
+    "    dataset = load_dataset(\"unsloth/Radiology_mini\")\n",
+    "    test_data = dataset[\"test\"].select(range(test_num))\n",
+    "    train_data = dataset[\"train\"].select(range(train_num))\n",
+    "    print(test_data)\n",
+    "    print(train_data)\n",
+    "    converted_dataset = [convert_to_conversation(sample) for sample in train_data]\n",
+    "    return converted_dataset, test_data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Prepare model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from unsloth import FastVisionModel # FastLanguageModel for LLMs\n",
+    "import torch\n",
+    "def load_model():\n",
+    "    model, tokenizer = FastVisionModel.from_pretrained(\n",
+    "        \"unsloth/Qwen2-VL-2B-Instruct\",\n",
+    "        load_in_4bit = False, # Use 4bit to reduce memory use. False for 16bit LoRA.\n",
+    "        use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for long context\n",
+    "    )\n",
+    "    model = FastVisionModel.get_peft_model(\n",
+    "        model,\n",
+    "        finetune_vision_layers     = False, # False if not finetuning vision layers\n",
+    "        finetune_language_layers   = True, # False if not finetuning language layers\n",
+    "        finetune_attention_modules = True, # False if not finetuning attention layers\n",
+    "        finetune_mlp_modules       = True, # False if not finetuning MLP layers\n",
+    "\n",
+    "        r = 16,           # The larger, the higher the accuracy, but might overfit\n",
+    "        lora_alpha = 16,  # Recommended alpha == r at least\n",
+    "        lora_dropout = 0,\n",
+    "        bias = \"none\",\n",
+    "        random_state = 3407,\n",
+    "        use_rslora = False,  # We support rank stabilized LoRA\n",
+    "        loftq_config = None, # And LoftQ\n",
+    "        # target_modules = \"all-linear\", # Optional now! Can specify a list if needed\n",
+    "    )\n",
+    "    return model, tokenizer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Create Trainer object"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from unsloth import is_bf16_supported\n",
+    "from unsloth.trainer import UnslothVisionDataCollator\n",
+    "from trl import SFTTrainer, SFTConfig\n",
+    "\n",
+    "def prep_train(model, tokenizer, converted_dataset, num_step, num_epoch):\n",
+    "    FastVisionModel.for_training(model) # Enable for training!\n",
+    "\n",
+    "    trainer = SFTTrainer(\n",
+    "        model = model,\n",
+    "        tokenizer = tokenizer,\n",
+    "        data_collator = UnslothVisionDataCollator(model, tokenizer), # Must use!\n",
+    "        train_dataset = converted_dataset,\n",
+    "        args = SFTConfig(\n",
+    "            per_device_train_batch_size = 2,\n",
+    "            gradient_accumulation_steps = 4,\n",
+    "            warmup_steps = 5,\n",
+    "            max_steps = num_step*num_epoch ,\n",
+    "            # num_train_epochs = 1, # Set this instead of max_steps for full training runs\n",
+    "            learning_rate = 2e-4,\n",
+    "            fp16 = not is_bf16_supported(),\n",
+    "            bf16 = is_bf16_supported(),\n",
+    "            logging_steps = 30,\n",
+    "            optim = \"adamw_8bit\",\n",
+    "            weight_decay = 0.01,\n",
+    "            lr_scheduler_type = \"linear\",\n",
+    "            seed = 3407,\n",
+    "            output_dir = \"outputs\",\n",
+    "            report_to = \"none\",     # For Weights and Biases\n",
+    "\n",
+    "            # You MUST put the below items for vision finetuning:\n",
+    "            remove_unused_columns = False,\n",
+    "            dataset_text_field = \"\",\n",
+    "            dataset_kwargs = {\"skip_prepare_dataset\": True},\n",
+    "            dataset_num_proc = 4,\n",
+    "            max_seq_length = 2048,\n",
+    "        ),\n",
+    "    )\n",
+    "    return trainer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Start memory"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def init_mem():\n",
+    "    gpu_stats = torch.cuda.get_device_properties(0)\n",
+    "    start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
+    "    return start_gpu_memory"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Get Memory Status"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_mem(trainer_stats, start_gpu_memory):\n",
+    "    used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
+    "    used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
+    "    min_time = round(trainer_stats.metrics['train_runtime']/60, 2)\n",
+    "    return min_time, used_memory, used_memory_for_lora"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Get Response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_res(model, tokenizer, test_data):\n",
+    "    FastVisionModel.for_inference(model) # Enable for inference!\n",
+    "    num = len(test_data)\n",
+    "    response = {}\n",
+    "    for i in range(num):\n",
+    "        image = test_data[i][\"image\"]\n",
+    "        instruction = \"You are an expert radiographer. Describe accurately what you see in this image.\"\n",
+    "\n",
+    "        messages = [\n",
+    "            {\"role\": \"user\", \"content\": [\n",
+    "                {\"type\": \"image\"},\n",
+    "                {\"type\": \"text\", \"text\": instruction}\n",
+    "            ]}\n",
+    "        ]\n",
+    "        input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)\n",
+    "        inputs = tokenizer(\n",
+    "            image,\n",
+    "            input_text,\n",
+    "            add_special_tokens = False,\n",
+    "            return_tensors = \"pt\",\n",
+    "        ).to(\"cuda\")\n",
+    "\n",
+    "        from transformers import TextStreamer\n",
-    "        from transformers import TextStreamer\n",
-    "        from transformers import TextStreamer\n",
+    "        text_streamer = TextStreamer(tokenizer, skip_prompt = True)\n",
+    "        output_ids = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,\n",
+    "                            use_cache = True, temperature = 1.5, min_p = 0.1)\n",
+    "        generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)\n",
+    "        response[i] = generated_text\n",
+    "    return response"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Evaluate BERTScore"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from bert_score import score as bert_score\n",
+    "\n",
+    "def evaluate(response, test_data):\n",
+    "    bert_p_scores, bert_r_scores, bert_f1_scores = [], [], []\n",
+    "\n",
+    "    results = {}\n",
+    "\n",
+    "    # Evaluate each response\n",
+    "    for i in range(len(response)):\n",
+    "        reference = test_data[i][\"caption\"]\n",
+    "        lines = response[i].splitlines()\n",
+    "        hypothesis = \"\\n\".join(lines[4:])\n",
+    "\n",
+    "        # BERTScore\n",
+    "        P, R, F1 = bert_score([hypothesis], [reference], lang=\"en\", verbose=False)\n",
+    "        bert_p_scores.append(P.item())\n",
+    "        bert_r_scores.append(R.item())\n",
+    "        bert_f1_scores.append(F1.item())\n",
+    "\n",
+    "    # Compute average scores\n",
+    "    avg_bert_p = np.mean(bert_p_scores)\n",
+    "    avg_bert_r = np.mean(bert_r_scores)\n",
+    "    avg_bert_f1 = np.mean(bert_f1_scores)\n",
+    "    results[\"BERT_Precision\"] = avg_bert_p\n",
+    "    results[\"BERT_Recall\"] = avg_bert_r\n",
+    "    results[\"BERT_F1\"] = avg_bert_f1\n",
+    "    return results\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cr",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}