From 2fa561895f202bb91b0df257c5b6398b6f268267 Mon Sep 17 00:00:00 2001 From: Rui Pires Date: Thu, 20 Feb 2025 14:19:51 +0000 Subject: [PATCH 01/14] Add lane segmentation model and dataset implementation --- src/.keep | 0 src/OpenCV/Dataset.py | 92 ++++++++++++++++++++++++++++++++++++++++++ src/OpenCV/Model.py | 14 +++++++ src/OpenCV/main.py | 37 +++++++++++++++++ src/OpenCV/training.py | 75 ++++++++++++++++++++++++++++++++++ src/OpenCV/utils.py | 74 +++++++++++++++++++++++++++++++++ 6 files changed, 292 insertions(+) delete mode 100644 src/.keep create mode 100644 src/OpenCV/Dataset.py create mode 100644 src/OpenCV/Model.py create mode 100644 src/OpenCV/main.py create mode 100644 src/OpenCV/training.py create mode 100644 src/OpenCV/utils.py diff --git a/src/.keep b/src/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/src/OpenCV/Dataset.py b/src/OpenCV/Dataset.py new file mode 100644 index 0000000..3479144 --- /dev/null +++ b/src/OpenCV/Dataset.py @@ -0,0 +1,92 @@ +import os +import json +import cv2 +import numpy as np +import torch +import torchvision.transforms as transforms +from torch.utils.data import Dataset + +def get_binary_labels(height, width, pts, thickness=5): + bin_img = np.zeros(shape=[height, width], dtype=np.uint8) + for lane in pts: + cv2.polylines( + bin_img, + np.int32([lane]), + isClosed=False, + color=1, + thickness=thickness) + return bin_img.astype(np.float32)[None, ...] + +def get_image_transform(): + normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + t = [transforms.ToTensor(), + normalizer] + + transform = transforms.Compose(t) + return transform + +class TuSimpleDataset(Dataset): + def __init__(self, json_paths, img_dir, width=512, height=256, + thickness=5, max_lanes=4): + self.samples = [] + self.width = width + self.height = height + self.thickness = thickness + self.max_lanes = max_lanes + self.img_dir = img_dir + self.transform = get_image_transform() + + # Load all samples from JSON files + for json_path in json_paths: + with open(json_path, 'r') as f: + for line in f: + self.samples.append(json.loads(line)) + + def __len__(self): + return len(self.samples) + + def __getitem__(self, idx): + info = self.samples[idx] + file_path = os.path.join(self.img_dir, info['raw_file']) + + # Read and resize image + image = cv2.imread(file_path) + if image is None: + raise ValueError(f"Could not read image: {file_path}") + + width_org = image.shape[1] + height_org = image.shape[0] + image = cv2.resize(image, (self.width, self.height)) + + # Process lane points + x_lanes = info['lanes'] + y_samples = info['h_samples'] + + # Create points list with list comprehension + pts = [ + [(x, y) for (x, y) in zip(lane, y_samples) if x >= 0] + for lane in x_lanes + ] + + # Remove empty lanes + pts = [l for l in pts if len(l) > 0] + + # Calculate scaling rates + x_rate = 1.0 * self.width / width_org + y_rate = 1.0 * self.height / height_org + + # Scale points + pts = [[(int(round(x*x_rate)), int(round(y*y_rate))) + for (x, y) in lane] for lane in pts] + + # Generate labels + bin_labels = get_binary_labels(self.height, self.width, pts, + thickness=self.thickness) + + image = self.transform(image) + + bin_labels = torch.Tensor(bin_labels) + + return image, bin_labels \ No newline at end of file diff --git a/src/OpenCV/Model.py b/src/OpenCV/Model.py new file mode 100644 index 0000000..359a4af --- /dev/null +++ b/src/OpenCV/Model.py @@ -0,0 +1,14 @@ +import torch +import torch.nn as nn +import torchvision.models.segmentation as models +from torchvision.models.segmentation import DeepLabV3_ResNet50_Weights + +class LaneSegmentationModel(nn.Module): + def __init__(self, num_classes=1): + super(LaneSegmentationModel, self).__init__() + weights = DeepLabV3_ResNet50_Weights.DEFAULT + self.model = models.deeplabv3_resnet50(weights=weights) + self.model.classifier[4] = nn.Conv2d(256, num_classes, kernel_size=1) + + def forward(self, x): + return self.model(x)['out'] \ No newline at end of file diff --git a/src/OpenCV/main.py b/src/OpenCV/main.py new file mode 100644 index 0000000..9825dfc --- /dev/null +++ b/src/OpenCV/main.py @@ -0,0 +1,37 @@ +import cv2 +import numpy as np +import torch +import glob +from Model import LaneSegmentationModel +from Dataset import get_image_transform +from utils import visualize_output_batch + +# Initialize model +device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") +print(f"Using device: {device}") +model = LaneSegmentationModel().to(device) +model.load_state_dict(torch.load('lane_segmentation.pth', map_location=device)) +model.eval() + +image_dir = "assets/images" # Update this to your images directory +image_paths = glob.glob(f"{image_dir}/*.jpg") + glob.glob(f"{image_dir}/*.png") + +for img_path in image_paths: + # Read image + frame = cv2.imread(img_path) + if frame is None: + print(f"Could not read image: {img_path}") + continue + + height, width, _ = frame.shape + + image = cv2.resize(frame, (256, 512)) + transform = get_image_transform() + image = transform(image).unsqueeze(0).to(device) + + with torch.no_grad(): + outputs = model(image) + + visualize_output_batch(image, outputs) + +cv2.destroyAllWindows() \ No newline at end of file diff --git a/src/OpenCV/training.py b/src/OpenCV/training.py new file mode 100644 index 0000000..05282bb --- /dev/null +++ b/src/OpenCV/training.py @@ -0,0 +1,75 @@ +import torch +import torch.nn as nn +import torch +import torch.optim as optim +from torch.utils.data import DataLoader +from torchvision import transforms +from Model import LaneSegmentationModel +from Dataset import TuSimpleDataset +from utils import visualize_batch + +dataset = TuSimpleDataset( + json_paths=[ + # "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0313.json", + # "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0531.json", + "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0601.json" + ], + img_dir="/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set", +) + +device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") +model = LaneSegmentationModel().to(device) + +dataloader = DataLoader( + dataset, + batch_size=16, + shuffle=True +) + +criterion = nn.BCEWithLogitsLoss() +learning_rate = 0.001 +optimizer = optim.Adam(model.parameters(), lr=learning_rate) + +if __name__ == "__main__": + total_samples = len(dataset) + print(f"Total training samples: {total_samples}") + + num_epochs = 1 + batch_size = 16 + steps_per_epoch = total_samples // batch_size + print(f"Steps per epoch: {steps_per_epoch}") + + best_loss = float('inf') + patience = 5 + patience_counter = 0 + + for epoch in range(num_epochs): + model.train() + total_loss = 0 + + for batch_idx, (images, masks) in enumerate(dataloader): + images = images.to(device) + masks = masks.to(device) + + # Forward pass + outputs = model(images) + loss = criterion(outputs, masks) + + # Backward pass + optimizer.zero_grad() + loss.backward() + optimizer.step() + + total_loss += loss.item() + + if (batch_idx + 1) % 5 == 0: + visualize_batch(images, masks, outputs) + print(f"Epoch [{epoch+1}/{num_epochs}], " + f"Step [{batch_idx+1}/{steps_per_epoch}], " + f"Loss: {loss.item():.4f}") + + avg_loss = total_loss / steps_per_epoch + print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}") + + print("Training finished!") + torch.save(model.state_dict(), "lane_segmentation.pth") \ No newline at end of file diff --git a/src/OpenCV/utils.py b/src/OpenCV/utils.py new file mode 100644 index 0000000..de63959 --- /dev/null +++ b/src/OpenCV/utils.py @@ -0,0 +1,74 @@ +import torch +import matplotlib.pyplot as plt +import numpy as np + +def visualize_batch(images, masks=None, outputs=None): + img = images[0].cpu().permute(1, 2, 0).numpy() + + # Denormalize image + mean = np.array([0.485, 0.456, 0.406]) + std = np.array([0.229, 0.224, 0.225]) + img = std * img + mean + img = np.clip(img, 0, 1) + + # Create figure + plt.figure(figsize=(15, 5)) + + # Plot original image + plt.subplot(131) + plt.imshow(img) + plt.title('Input Image') + plt.axis('off') + + # Plot ground truth mask (single channel) + if masks is not None: + mask = masks[0].cpu().numpy() + plt.subplot(132) + plt.imshow(mask[0], cmap='gray') + plt.title('Ground Truth') + plt.axis('off') + + # Plot prediction if available + if outputs is not None: + pred = torch.sigmoid(outputs[0]).cpu().detach().numpy() + plt.subplot(133) + plt.imshow(pred[0], cmap='jet') + plt.colorbar() + plt.title('Prediction') + plt.axis('off') + + plt.tight_layout() + plt.show() + plt.pause(0.1) + + +def visualize_output_batch(image, outputs=None): + img = image[0].cpu().permute(1, 2, 0).numpy() + + # Denormalize image + mean = np.array([0.485, 0.456, 0.406]) + std = np.array([0.229, 0.224, 0.225]) + img = std * img + mean + img = np.clip(img, 0, 1) + + # Create figure + plt.figure(figsize=(15, 5)) + + # Plot original image + plt.subplot(131) + plt.imshow(img) + plt.title('Input Image') + plt.axis('off') + + # Plot prediction if available + if outputs is not None: + pred = torch.sigmoid(outputs[0]).cpu().detach().numpy() # Changed to sigmoid for single channel + plt.subplot(132) + plt.imshow(pred[0], cmap='jet') + plt.colorbar() + plt.title('Prediction') + plt.axis('off') + + plt.tight_layout() + plt.show() + plt.pause(0.1) \ No newline at end of file From 065b5227d9d21be8e7f20a4c1b64668eaa4be093 Mon Sep 17 00:00:00 2001 From: Rui Pires Date: Thu, 20 Feb 2025 14:25:48 +0000 Subject: [PATCH 02/14] Update .gitignore to include additional asset and cache files --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0b3abcd..e41c0ba 100644 --- a/.gitignore +++ b/.gitignore @@ -43,4 +43,7 @@ install_manifest.txt compile_commands.json CTestTestfile.cmake _deps -CMakeUserPresets.json \ No newline at end of file +CMakeUserPresets.json +src/OpenCV/assets/* +src/OpenCV/__pycache__/* +lane_segmentation.pth \ No newline at end of file From d2fb867f7ff33dffc428950eab4cba13697ee83a Mon Sep 17 00:00:00 2001 From: Rui Pires Date: Thu, 20 Feb 2025 15:38:48 +0000 Subject: [PATCH 03/14] Refactor lane segmentation code: streamline dataset initialization, enhance image processing, and improve inference loop --- src/OpenCV/Dataset.py | 6 +++--- src/OpenCV/main.py | 45 +++++++++++++++++++++++++++--------------- src/OpenCV/training.py | 35 +++++++------------------------- src/OpenCV/utils.py | 2 -- 4 files changed, 39 insertions(+), 49 deletions(-) diff --git a/src/OpenCV/Dataset.py b/src/OpenCV/Dataset.py index 3479144..ee907b3 100644 --- a/src/OpenCV/Dataset.py +++ b/src/OpenCV/Dataset.py @@ -15,6 +15,7 @@ def get_binary_labels(height, width, pts, thickness=5): isClosed=False, color=1, thickness=thickness) + return bin_img.astype(np.float32)[None, ...] def get_image_transform(): @@ -29,16 +30,14 @@ def get_image_transform(): class TuSimpleDataset(Dataset): def __init__(self, json_paths, img_dir, width=512, height=256, - thickness=5, max_lanes=4): + thickness=5): self.samples = [] self.width = width self.height = height self.thickness = thickness - self.max_lanes = max_lanes self.img_dir = img_dir self.transform = get_image_transform() - # Load all samples from JSON files for json_path in json_paths: with open(json_path, 'r') as f: for line in f: @@ -55,6 +54,7 @@ def __getitem__(self, idx): image = cv2.imread(file_path) if image is None: raise ValueError(f"Could not read image: {file_path}") + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) width_org = image.shape[1] height_org = image.shape[0] diff --git a/src/OpenCV/main.py b/src/OpenCV/main.py index 9825dfc..690480a 100644 --- a/src/OpenCV/main.py +++ b/src/OpenCV/main.py @@ -5,6 +5,8 @@ from Model import LaneSegmentationModel from Dataset import get_image_transform from utils import visualize_output_batch +from PIL import Image +from torchvision import transforms # Initialize model device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") @@ -13,25 +15,36 @@ model.load_state_dict(torch.load('lane_segmentation.pth', map_location=device)) model.eval() -image_dir = "assets/images" # Update this to your images directory -image_paths = glob.glob(f"{image_dir}/*.jpg") + glob.glob(f"{image_dir}/*.png") +cap = cv2.VideoCapture("assets/road.mp4") -for img_path in image_paths: - # Read image - frame = cv2.imread(img_path) - if frame is None: - print(f"Could not read image: {img_path}") - continue - - height, width, _ = frame.shape +while True: + ret, frame = cap.read() + if not ret: + break - image = cv2.resize(frame, (256, 512)) - transform = get_image_transform() - image = transform(image).unsqueeze(0).to(device) + rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + transforms = get_image_transform() + input_tensor = transforms(rgb_frame).unsqueeze(0).to(device) + + # Run inference. with torch.no_grad(): - outputs = model(image) - - visualize_output_batch(image, outputs) + output = model(input_tensor) + + output_mask = output.squeeze().cpu().numpy() + binary_mask = (output_mask > 0.5).astype(np.uint8) * 255 + + # Resize the mask to the original frame dimensions. + mask_resized = cv2.resize(binary_mask, (frame.shape[1], frame.shape[0])) + + # Create a copy of the original frame. + blended = frame.copy() + # Replace pixels where the mask is non-zero with green (BGR: [0,255,0]). + blended[mask_resized > 0] = [0, 255, 0] + + # Display the result. + cv2.imshow("Lane Detection", blended) + if cv2.waitKey(1) & 0xFF == ord('q'): + break cv2.destroyAllWindows() \ No newline at end of file diff --git a/src/OpenCV/training.py b/src/OpenCV/training.py index 05282bb..e0fb3b3 100644 --- a/src/OpenCV/training.py +++ b/src/OpenCV/training.py @@ -10,15 +10,12 @@ dataset = TuSimpleDataset( json_paths=[ - # "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0313.json", - # "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0531.json", + "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0313.json", + "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0531.json", "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0601.json" ], img_dir="/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set", ) - -device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") -model = LaneSegmentationModel().to(device) dataloader = DataLoader( dataset, @@ -26,50 +23,32 @@ shuffle=True ) +device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") +model = LaneSegmentationModel().to(device) + criterion = nn.BCEWithLogitsLoss() -learning_rate = 0.001 -optimizer = optim.Adam(model.parameters(), lr=learning_rate) +optimizer = optim.Adam(model.parameters(), lr=0.001) if __name__ == "__main__": - total_samples = len(dataset) - print(f"Total training samples: {total_samples}") - num_epochs = 1 batch_size = 16 - steps_per_epoch = total_samples // batch_size - print(f"Steps per epoch: {steps_per_epoch}") - - best_loss = float('inf') - patience = 5 - patience_counter = 0 for epoch in range(num_epochs): model.train() - total_loss = 0 - for batch_idx, (images, masks) in enumerate(dataloader): images = images.to(device) masks = masks.to(device) - # Forward pass + optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, masks) - - # Backward pass - optimizer.zero_grad() loss.backward() optimizer.step() - - total_loss += loss.item() if (batch_idx + 1) % 5 == 0: visualize_batch(images, masks, outputs) print(f"Epoch [{epoch+1}/{num_epochs}], " - f"Step [{batch_idx+1}/{steps_per_epoch}], " f"Loss: {loss.item():.4f}") - avg_loss = total_loss / steps_per_epoch - print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}") - print("Training finished!") torch.save(model.state_dict(), "lane_segmentation.pth") \ No newline at end of file diff --git a/src/OpenCV/utils.py b/src/OpenCV/utils.py index de63959..4cd493a 100644 --- a/src/OpenCV/utils.py +++ b/src/OpenCV/utils.py @@ -33,7 +33,6 @@ def visualize_batch(images, masks=None, outputs=None): pred = torch.sigmoid(outputs[0]).cpu().detach().numpy() plt.subplot(133) plt.imshow(pred[0], cmap='jet') - plt.colorbar() plt.title('Prediction') plt.axis('off') @@ -65,7 +64,6 @@ def visualize_output_batch(image, outputs=None): pred = torch.sigmoid(outputs[0]).cpu().detach().numpy() # Changed to sigmoid for single channel plt.subplot(132) plt.imshow(pred[0], cmap='jet') - plt.colorbar() plt.title('Prediction') plt.axis('off') From ff45cd859490a73556e41519fdff45f0518e60bb Mon Sep 17 00:00:00 2001 From: Luis Carvalho Date: Thu, 20 Feb 2025 15:53:59 +0000 Subject: [PATCH 04/14] Add dataset and model implementation for lane segmentation; update .gitignore and requirements --- .gitignore | 1 + requirements.txt | 7 +++++++ src/{OpenCV => }/Dataset.py | 0 src/{OpenCV => }/Model.py | 0 src/{OpenCV => }/main.py | 12 ++++++++++-- src/{OpenCV => }/training.py | 27 ++++++++++++++++++--------- src/{OpenCV => }/utils.py | 0 7 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 requirements.txt rename src/{OpenCV => }/Dataset.py (100%) rename src/{OpenCV => }/Model.py (100%) rename src/{OpenCV => }/main.py (77%) rename src/{OpenCV => }/training.py (58%) rename src/{OpenCV => }/utils.py (100%) diff --git a/.gitignore b/.gitignore index e41c0ba..fe5b0b1 100644 --- a/.gitignore +++ b/.gitignore @@ -46,4 +46,5 @@ _deps CMakeUserPresets.json src/OpenCV/assets/* src/OpenCV/__pycache__/* +env/* lane_segmentation.pth \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8d10435 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +torch +torchvision +timm +matplotlib +pandas +numpy +opencv-python \ No newline at end of file diff --git a/src/OpenCV/Dataset.py b/src/Dataset.py similarity index 100% rename from src/OpenCV/Dataset.py rename to src/Dataset.py diff --git a/src/OpenCV/Model.py b/src/Model.py similarity index 100% rename from src/OpenCV/Model.py rename to src/Model.py diff --git a/src/OpenCV/main.py b/src/main.py similarity index 77% rename from src/OpenCV/main.py rename to src/main.py index 690480a..6002efd 100644 --- a/src/OpenCV/main.py +++ b/src/main.py @@ -9,8 +9,16 @@ from torchvision import transforms # Initialize model -device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") -print(f"Using device: {device}") +if torch.cuda.is_available(): + device = torch.device("cuda") + torch.backends.cudnn.benchmark = True # Optimize CUDA performance + print(f"Using CUDA device: {torch.cuda.get_device_name()}") +elif torch.backends.mps.is_available(): # For Apple Silicon + device = torch.device("mps") + print("Using MPS (Metal Performance Shaders)") +else: + device = torch.device("cpu") + print("Using CPU") model = LaneSegmentationModel().to(device) model.load_state_dict(torch.load('lane_segmentation.pth', map_location=device)) model.eval() diff --git a/src/OpenCV/training.py b/src/training.py similarity index 58% rename from src/OpenCV/training.py rename to src/training.py index e0fb3b3..71b05b5 100644 --- a/src/OpenCV/training.py +++ b/src/training.py @@ -10,28 +10,37 @@ dataset = TuSimpleDataset( json_paths=[ - "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0313.json", - "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0531.json", - "/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set/label_data_0601.json" + "/home/luis_t2/OpenCV/assets/TUSimple/train_set/label_data_0313.json", + "/home/luis_t2/OpenCV/assets/TUSimple/train_set/label_data_0531.json", + "/home/luis_t2/OpenCV/assets/TUSimple/train_set/label_data_0601.json" ], - img_dir="/Users/ruipedropires/.cache/kagglehub/datasets/manideep1108/tusimple/versions/5/TUSimple/train_set", + img_dir="/home/luis_t2/OpenCV/assets/TUSimple/train_set/", ) dataloader = DataLoader( dataset, - batch_size=16, + batch_size=8, shuffle=True ) -device = torch.device("mps" if torch.backends.mps.is_available() else "cpu") +if torch.cuda.is_available(): + device = torch.device("cuda") + torch.backends.cudnn.benchmark = True # Optimize CUDA performance + print(f"Using CUDA device: {torch.cuda.get_device_name()}") +elif torch.backends.mps.is_available(): # For Apple Silicon + device = torch.device("mps") + print("Using MPS (Metal Performance Shaders)") +else: + device = torch.device("cpu") + print("Using CPU") model = LaneSegmentationModel().to(device) criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) if __name__ == "__main__": - num_epochs = 1 - batch_size = 16 + num_epochs = 4 + batch_size = 8 for epoch in range(num_epochs): model.train() @@ -46,7 +55,7 @@ optimizer.step() if (batch_idx + 1) % 5 == 0: - visualize_batch(images, masks, outputs) + # visualize_batch(images, masks, outputs) print(f"Epoch [{epoch+1}/{num_epochs}], " f"Loss: {loss.item():.4f}") diff --git a/src/OpenCV/utils.py b/src/utils.py similarity index 100% rename from src/OpenCV/utils.py rename to src/utils.py From 28615fc3644bda0019d5274db83641f6f5608b84 Mon Sep 17 00:00:00 2001 From: Luis Carvalho Date: Thu, 20 Feb 2025 15:54:18 +0000 Subject: [PATCH 05/14] Update .gitignore to generalize asset and cache file paths; remove empty keep files from tests --- .gitignore | 4 ++-- tests/logic/.keep | 0 tests/ui/.keep | 0 3 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 tests/logic/.keep delete mode 100644 tests/ui/.keep diff --git a/.gitignore b/.gitignore index fe5b0b1..307123f 100644 --- a/.gitignore +++ b/.gitignore @@ -44,7 +44,7 @@ compile_commands.json CTestTestfile.cmake _deps CMakeUserPresets.json -src/OpenCV/assets/* -src/OpenCV/__pycache__/* +src/assets/* +src/__pycache__/* env/* lane_segmentation.pth \ No newline at end of file diff --git a/tests/logic/.keep b/tests/logic/.keep deleted file mode 100644 index e69de29..0000000 diff --git a/tests/ui/.keep b/tests/ui/.keep deleted file mode 100644 index e69de29..0000000 From 012efc98310ef4c38296f2daa484e4b0a2920dd5 Mon Sep 17 00:00:00 2001 From: Luis Carvalho Date: Thu, 20 Feb 2025 16:18:21 +0000 Subject: [PATCH 06/14] Increase the number of training epochs from 4 to 20 for improved model performance --- src/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/training.py b/src/training.py index 71b05b5..5b51178 100644 --- a/src/training.py +++ b/src/training.py @@ -39,7 +39,7 @@ optimizer = optim.Adam(model.parameters(), lr=0.001) if __name__ == "__main__": - num_epochs = 4 + num_epochs = 20 batch_size = 8 for epoch in range(num_epochs): From 41d822fdd3712bdeb7ee5fe8d8732bbc93a02e6d Mon Sep 17 00:00:00 2001 From: Luis Carvalho Date: Thu, 20 Feb 2025 16:21:15 +0000 Subject: [PATCH 07/14] Update video source path in main.py to use road1.mp4 --- src/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 6002efd..7eeed50 100644 --- a/src/main.py +++ b/src/main.py @@ -23,7 +23,7 @@ model.load_state_dict(torch.load('lane_segmentation.pth', map_location=device)) model.eval() -cap = cv2.VideoCapture("assets/road.mp4") +cap = cv2.VideoCapture("assets/road1.mp4") while True: ret, frame = cap.read() From 0bd0c72a32643505ca4b0e6300595fb5a57d8a48 Mon Sep 17 00:00:00 2001 From: Luis Carvalho Date: Thu, 20 Feb 2025 16:46:48 +0000 Subject: [PATCH 08/14] Adjust binary mask threshold and enhance mask processing for lane detection --- src/main.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/main.py b/src/main.py index 7eeed50..d88fb7d 100644 --- a/src/main.py +++ b/src/main.py @@ -40,15 +40,20 @@ output = model(input_tensor) output_mask = output.squeeze().cpu().numpy() - binary_mask = (output_mask > 0.5).astype(np.uint8) * 255 + binary_mask = (output_mask > 0.6).astype(np.uint8) * 255 + + kernel = np.ones((3,3), np.uint8) + binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel) # Remove noise + binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel) # Resize the mask to the original frame dimensions. mask_resized = cv2.resize(binary_mask, (frame.shape[1], frame.shape[0])) # Create a copy of the original frame. - blended = frame.copy() - # Replace pixels where the mask is non-zero with green (BGR: [0,255,0]). - blended[mask_resized > 0] = [0, 255, 0] + overlay = frame.copy() + overlay[mask_resized > 0] = [0, 255, 0] + alpha = 0.4 # Transparency factor + blended = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0) # Display the result. cv2.imshow("Lane Detection", blended) From 846b69678586182dc4c24fd674234dce1a0850a8 Mon Sep 17 00:00:00 2001 From: Rui Pires Date: Fri, 21 Feb 2025 11:07:38 +0000 Subject: [PATCH 09/14] Implement model checkpointing and average loss calculation during training --- src/training.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/training.py b/src/training.py index 5b51178..e963592 100644 --- a/src/training.py +++ b/src/training.py @@ -41,9 +41,14 @@ if __name__ == "__main__": num_epochs = 20 batch_size = 8 + best_loss = float('inf') + best_model_path = "best_lane_segmentation.pth" for epoch in range(num_epochs): model.train() + epoch_loss = 0.0 + batch_count = 0 + for batch_idx, (images, masks) in enumerate(dataloader): images = images.to(device) masks = masks.to(device) @@ -53,11 +58,29 @@ loss = criterion(outputs, masks) loss.backward() optimizer.step() + + epoch_loss += loss.item() + batch_count += 1 if (batch_idx + 1) % 5 == 0: # visualize_batch(images, masks, outputs) print(f"Epoch [{epoch+1}/{num_epochs}], " f"Loss: {loss.item():.4f}") + # Calculate average loss for the epoch + avg_epoch_loss = epoch_loss / batch_count + print(f"Epoch [{epoch+1}/{num_epochs}] Average Loss: {avg_epoch_loss:.4f}") + + # Save the model if it has the best loss so far + if avg_epoch_loss < best_loss: + best_loss = avg_epoch_loss + torch.save({ + 'epoch': epoch, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'loss': best_loss, + }, best_model_path) + print(f"Saved new best model with loss: {best_loss:.4f}") + print("Training finished!") - torch.save(model.state_dict(), "lane_segmentation.pth") \ No newline at end of file + print(f"Best model saved with loss: {best_loss:.4f}") \ No newline at end of file From 2bd4fb94c7bdc80f1897dabfa6e46aabae72b950 Mon Sep 17 00:00:00 2001 From: Rui Pires Date: Fri, 21 Feb 2025 12:15:40 +0000 Subject: [PATCH 10/14] Load model state from checkpoint for improved lane segmentation --- src/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index d88fb7d..e50c482 100644 --- a/src/main.py +++ b/src/main.py @@ -20,8 +20,8 @@ device = torch.device("cpu") print("Using CPU") model = LaneSegmentationModel().to(device) -model.load_state_dict(torch.load('lane_segmentation.pth', map_location=device)) -model.eval() +checkpoint = torch.load("best_lane_segmentation.pth") +model.load_state_dict(checkpoint['model_state_dict']) cap = cv2.VideoCapture("assets/road1.mp4") From 5debc0a1f0ff1e441943e96ac2449f172a24b7ed Mon Sep 17 00:00:00 2001 From: Luis Carvalho Date: Fri, 21 Feb 2025 12:16:39 +0000 Subject: [PATCH 11/14] Add best_lane_segmentation.pth to .gitignore to prevent tracking of model weights --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 307123f..eafeb72 100644 --- a/.gitignore +++ b/.gitignore @@ -46,5 +46,6 @@ _deps CMakeUserPresets.json src/assets/* src/__pycache__/* +src/best_lane_segmentation.pth env/* lane_segmentation.pth \ No newline at end of file From 6e975ad48106263711882dd6f673434213874d11 Mon Sep 17 00:00:00 2001 From: Luis Carvalho Date: Fri, 21 Feb 2025 12:22:39 +0000 Subject: [PATCH 12/14] Update video source path to road3.mp4 and adjust mask processing parameters --- src/main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main.py b/src/main.py index e50c482..8f2e839 100644 --- a/src/main.py +++ b/src/main.py @@ -22,8 +22,9 @@ model = LaneSegmentationModel().to(device) checkpoint = torch.load("best_lane_segmentation.pth") model.load_state_dict(checkpoint['model_state_dict']) +model.eval() -cap = cv2.VideoCapture("assets/road1.mp4") +cap = cv2.VideoCapture("assets/road3.mp4") while True: ret, frame = cap.read() @@ -42,7 +43,7 @@ output_mask = output.squeeze().cpu().numpy() binary_mask = (output_mask > 0.6).astype(np.uint8) * 255 - kernel = np.ones((3,3), np.uint8) + kernel = np.ones((4,4), np.uint8) binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel) # Remove noise binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel) @@ -52,7 +53,7 @@ # Create a copy of the original frame. overlay = frame.copy() overlay[mask_resized > 0] = [0, 255, 0] - alpha = 0.4 # Transparency factor + alpha = 1 # Transparency factor blended = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0) # Display the result. From 3cd9b66fa531a0b7f1c2868abf70665d9fd1e959 Mon Sep 17 00:00:00 2001 From: Rui Pires Date: Fri, 21 Feb 2025 14:42:44 +0000 Subject: [PATCH 13/14] Implement post-processing class for binary mask refinement and update video source path --- src/main.py | 61 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/src/main.py b/src/main.py index 8f2e839..796f41b 100644 --- a/src/main.py +++ b/src/main.py @@ -8,6 +8,52 @@ from PIL import Image from torchvision import transforms + +class PostProcessor(object): + + def __init__(self): + pass + + def process(self, image, kernel_size=5, minarea_threshold=10): + """Do the post processing here. First the image is converte to grayscale. + Then a closing operation is applied to fill empty gaps among surrounding + pixels. After that connected component are detected where small components + will be removed. + + Args: + image: + kernel_size + minarea_threshold + + Returns: + image: binary image + + """ + if image.dtype is not np.uint8: + image = np.array(image, np.uint8) + if len(image.shape) == 3: + image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + # fill the pixel gap using Closing operator (dilation followed by + # erosion) + kernel = cv2.getStructuringElement( + shape=cv2.MORPH_RECT, ksize=( + kernel_size, kernel_size)) + image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel) + + ccs = cv2.connectedComponentsWithStats( + image, connectivity=8, ltype=cv2.CV_32S) + labels = ccs[1] + stats = ccs[2] + + for index, stat in enumerate(stats): + if stat[4] <= minarea_threshold: + idx = np.where(labels == index) + image[idx] = 0 + + return image + + # Initialize model if torch.cuda.is_available(): device = torch.device("cuda") @@ -24,7 +70,8 @@ model.load_state_dict(checkpoint['model_state_dict']) model.eval() -cap = cv2.VideoCapture("assets/road3.mp4") +cap = cv2.VideoCapture("assets/road1.mp4") +post_processor = PostProcessor() while True: ret, frame = cap.read() @@ -43,17 +90,19 @@ output_mask = output.squeeze().cpu().numpy() binary_mask = (output_mask > 0.6).astype(np.uint8) * 255 - kernel = np.ones((4,4), np.uint8) - binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel) # Remove noise - binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel) + processed_mask = post_processor.process( + binary_mask, + kernel_size=5, + minarea_threshold=10 + ) # Resize the mask to the original frame dimensions. - mask_resized = cv2.resize(binary_mask, (frame.shape[1], frame.shape[0])) + mask_resized = cv2.resize(processed_mask, (frame.shape[1], frame.shape[0])) # Create a copy of the original frame. overlay = frame.copy() overlay[mask_resized > 0] = [0, 255, 0] - alpha = 1 # Transparency factor + alpha = 0.4 # Transparency factor blended = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0) # Display the result. From 3498104a58d83e890ce6dd1da16026727b394127 Mon Sep 17 00:00:00 2001 From: Rui Pires Date: Fri, 21 Feb 2025 15:16:44 +0000 Subject: [PATCH 14/14] Export lane segmentation model to ONNX format for improved interoperability --- src/main.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/main.py b/src/main.py index 796f41b..8ca134e 100644 --- a/src/main.py +++ b/src/main.py @@ -70,6 +70,23 @@ def process(self, image, kernel_size=5, minarea_threshold=10): model.load_state_dict(checkpoint['model_state_dict']) model.eval() +dummy_input = torch.randn(1, 3, 256, 256).to(device) +torch.onnx.export( + model, + dummy_input, + "lane_segmentation.onnx", + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names=['input'], + output_names=['output'], + dynamic_axes={ + 'input': {0: 'batch_size'}, + 'output': {0: 'batch_size'} + } +) +print("Model exported to ONNX format!") + cap = cv2.VideoCapture("assets/road1.mp4") post_processor = PostProcessor()