diff --git a/1_detection_to_csv.ipynb b/1_detection_to_csv.ipynb new file mode 100644 index 0000000..bc1f1de --- /dev/null +++ b/1_detection_to_csv.ipynb @@ -0,0 +1,141273 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "id": "6dbcbec9-adfc-45f8-adc8-61272a0b90e4", + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "import glob\n", + "import pandas as pd\n", + "import numpy as np\n", + "from tqdm.auto import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "23394936-7e2a-4b62-b65c-abe7bfc30cbc", + "metadata": {}, + "outputs": [], + "source": [ + "#os.chdir(r'C:\\Users\\ashams\\Box\\MnDOT DNRTOR Project\\Meenakshi\\detect')\n", + "#os.chdir(r'C:\\Users\\marya1\\Box\\MnDOT DNRTOR Project\\Meenakshi\\detect')\n", + "os.chdir(r'/home/marya1/Documents/MnDoTNRToR/detect/processing')" + ] + }, + { + "cell_type": "markdown", + "id": "ae5b4ab6", + "metadata": {}, + "source": [ + "### In case the column names are not separated by ; ###" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6afe2013", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6.15_PM_NTOR\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e7d9eed57d22422b9fe40e1b4863a463", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/141113 [00:00= 7: # Ensure all parts are present\n", + " frame, track, cls, cls_id, xmin, ymin, xmax, ymax = [part.split(':')[1].strip() for part in parts[:8]]\n", + " rows_to_add.append({\n", + " \"Frame\": int(frame),\n", + " \"Track\": int(track),\n", + " \"Class\": str(cls),\n", + " \"Class_ID\": int(float(cls_id)),\n", + " \"xmin\": int(xmin),\n", + " \"ymin\": int(ymin),\n", + " \"xmax\": int(xmax),\n", + " \"ymax\": int(ymax)\n", + " })\n", + " \n", + " # If there are rows to add, concatenate them to the main DataFrame\n", + " if rows_to_add:\n", + " new_rows_df = pd.DataFrame(rows_to_add)\n", + " output_csv = pd.concat([output_csv, new_rows_df], ignore_index=True)\n", + " \n", + " output_csv['Datetime'] = folder\n", + " \n", + " new_file_path = os.path.join(directory_path, folder + \".csv\")\n", + " output_csv.sort_values(by=[\"Frame\", \"Track\"]).to_csv(new_file_path, index=False)\n", + " print(\"Processed and saved:\", new_file_path)\n", + "\n", + "print(\"All files have been processed.\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0feb9f3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/1_detection_to_csv_wbb.ipynb b/1_detection_to_csv_wbb.ipynb new file mode 100644 index 0000000..292b0b2 --- /dev/null +++ b/1_detection_to_csv_wbb.ipynb @@ -0,0 +1,245759 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os, sys\n", + "import glob\n", + "import pandas as pd\n", + "import numpy as np\n", + "from tqdm.auto import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#os.chdir(r'C:\\Users\\ashams\\Box\\MnDOT DNRTOR Project\\Meenakshi\\detect')\n", + "#os.chdir(r'C:\\Users\\marya1\\Box\\MnDOT DNRTOR Project\\Meenakshi\\detect')\n", + "#os.chdir(r'/home/marya/Desktop/zero-shot-object-tracking/runs/detect/processing')\n", + "os.chdir(r'/home/marya1/Documents/MnDoTNRToR/detect/processing')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9.29_AM_NTOR\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "95c1b2b13c3f464a888594efdeceee60", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/245611 [00:00= 7: # Ensure all parts are present\n", + " frame, track, cls, cls_id, xmin, ymin, xmax, ymax = [part.split(':')[1].strip() for part in parts[:8]]\n", + " rows_to_add.append({\n", + " \"Frame\": int(frame),\n", + " \"Track\": int(track),\n", + " \"Class\": str(cls),\n", + " \"Class_ID\": int(float(cls_id)),\n", + " \"xmin\": int(xmin),\n", + " \"ymin\": int(ymin),\n", + " \"xmax\": int(xmax),\n", + " \"ymax\": int(ymax)\n", + " })\n", + " \n", + " # If there are rows to add, concatenate them to the main DataFrame\n", + " if rows_to_add:\n", + " new_rows_df = pd.DataFrame(rows_to_add)\n", + " output_csv = pd.concat([output_csv, new_rows_df], ignore_index=True)\n", + " \n", + " output_csv['Datetime'] = folder\n", + " \n", + " new_file_path = os.path.join(directory_path, folder + \".csv\")\n", + " output_csv.sort_values(by=[\"Frame\", \"Track\"]).to_csv(new_file_path, index=False)\n", + " print(\"Processed and saved:\", new_file_path)\n", + "\n", + "print(\"All files have been processed.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/2_detect_red_all.py b/2_detect_red_all.py new file mode 100644 index 0000000..26cb028 --- /dev/null +++ b/2_detect_red_all.py @@ -0,0 +1,101 @@ +import cv2 +import os +import pandas as pd +import numpy as np +from os.path import splitext, basename, join +from datetime import datetime, timedelta +from glob import glob + +# Function to detect red light in the ROI +def detect_red_light(image): + lower_red = np.array([0, 120, 70]) + upper_red = np.array([10, 255, 255]) + hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + mask = cv2.inRange(hsv, lower_red, upper_red) + return cv2.countNonZero(mask) > 0 + +# Function to manually draw the ROI +def draw_roi(event, x, y, flags, param): + global traffic_light_box, drawing, top_left_pt, bottom_right_pt, roi_selected + if event == cv2.EVENT_LBUTTONDOWN: + drawing = True + top_left_pt = (x, y) + elif event == cv2.EVENT_LBUTTONUP: + drawing = False + bottom_right_pt = (x, y) + roi_selected = True + +# Paths +videos_path = 'C:/Users/marya1/Box/MnDOT DNRTOR Project/Meenakshi/videos' +output_path = 'C:/Users/marya1/Box/MnDOT DNRTOR Project/Meenakshi/detect_red' +#videos_path = 'C:/Users/ASUS/Box/MnDOT DNRTOR Project/Pratik' +#output_path = 'C:/Users/ASUS/Box/MnDOT DNRTOR Project/Pratik/output' +#videos_path = 'E:/MnDOT/Videos' +#output_path = 'E:/MnDOT/Videos/output' + + +# List all video files +video_files = glob(join(videos_path, '*.mp4')) + +for video_path in video_files: + # Initialize variables for each video + traffic_light_box = None + roi_selected = False + + # Display video and select ROI + cap = cv2.VideoCapture(video_path) + ret, first_frame = cap.read() + if not ret: + print("Failed to read video:", video_path) + cap.release() + continue # Skip to the next video + + # Input for date and time + video_date = input(f"Enter the video start date (yyyy-mm-dd) for {basename(video_path)}: ") + video_start_time = input(f"Enter the video start time (hh-mm-ss) for {basename(video_path)}: ") + start_datetime = datetime.strptime(f"{video_date} {video_start_time}", "%Y-%m-%d %H-%M-%S") + + cv2.namedWindow('Frame') + cv2.setMouseCallback('Frame', draw_roi) + + print("Draw a bounding box around the traffic light and press 'Enter'.") + while True: + frame_copy = first_frame.copy() + if roi_selected: + cv2.rectangle(frame_copy, top_left_pt, bottom_right_pt, (0, 255, 0), 2) + cv2.imshow('Frame', frame_copy) + key = cv2.waitKey(1) + if key == 13: # Enter key + traffic_light_box = (top_left_pt[0], top_left_pt[1], bottom_right_pt[0] - top_left_pt[0], bottom_right_pt[1] - top_left_pt[1]) + break + + cv2.destroyAllWindows() + + df = pd.DataFrame(columns=['Frame_Number', 'Date', 'Time', 'Red_Light']) + frame_counter = 0 + fps = cap.get(cv2.CAP_PROP_FPS) + + while cap.isOpened(): + ret, frame = cap.read() + if not ret: + break + current_time = start_datetime + timedelta(seconds=frame_counter / fps) + date_str = current_time.strftime("%Y-%m-%d") + time_str = current_time.strftime("%H:%M:%S") + + traffic_light_roi = frame[traffic_light_box[1]:traffic_light_box[1]+traffic_light_box[3], + traffic_light_box[0]:traffic_light_box[0]+traffic_light_box[2]] + red_light_detected = detect_red_light(traffic_light_roi) + + new_row = {'Frame_Number': frame_counter, 'Date': date_str, 'Time': time_str, 'Red_Light': red_light_detected} + df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) + + frame_counter += 1 + + cap.release() + + # Save the DataFrame to a CSV file + csv_file_name = splitext(basename(video_path))[0] + '.csv' + csv_path = join(output_path, csv_file_name) + df.to_csv(csv_path, index=False) + print(f"CSV saved at {csv_path}") diff --git a/3_merge_detection_red.py b/3_merge_detection_red.py new file mode 100644 index 0000000..fe77b13 --- /dev/null +++ b/3_merge_detection_red.py @@ -0,0 +1,55 @@ +import pandas as pd +import os +from glob import glob + +def merge_csv_with_timestamps(output_csv_dir, timestamps_csv_dir, merged_csv_dir): + # List all CSV files in the output CSV directory + output_csv_files = glob(os.path.join(output_csv_dir, '*.csv')) + + for output_csv_path in output_csv_files: + # Derive the base file name without extension to match with timestamp file + base_file_name = os.path.basename(output_csv_path) + + # Construct the expected timestamp CSV file path + timestamps_csv_path = os.path.join(timestamps_csv_dir, base_file_name) + + # Check if the corresponding timestamps CSV exists + if not os.path.exists(timestamps_csv_path): + print(f"No matching timestamp file found for {base_file_name}. Skipping...") + continue + + # Load the CSV files + output_df = pd.read_csv(output_csv_path) + timestamps_df = pd.read_csv(timestamps_csv_path) + + # Merge the two dataframes on the Frame and Frame_Number columns + merged_df = pd.merge(output_df, timestamps_df, left_on='Frame', right_on='Frame_Number', how='left') + + # Including all relevant columns and renaming + #final_df = merged_df[['Datetime', 'Frame', 'Track', 'Class', 'BBox', 'Time', 'Date', 'Red_Light']].copy() + final_df = merged_df[['Datetime', 'Frame', 'Track', 'Class', 'Class_ID','xmin', 'ymin','xmax','ymax', 'Time', 'Date', 'Red_Light']].copy() + final_df.rename(columns={'Frame': 'FrameNumber', 'Red_Light': 'Color'}, inplace=True) + + # Handle 'Color' column + final_df['Color'].fillna('Not Red', inplace=True) + final_df['Color'] = final_df['Color'].apply(lambda x: 'Red' if x == True else 'Not Red') + + # Construct path for saving the merged CSV + merged_csv_path = os.path.join(merged_csv_dir, base_file_name) + + # Save the merged dataframe + final_df.to_csv(merged_csv_path, index=False) + print(f"Merged CSV saved to {merged_csv_path}") + +# Directories containing the CSV files +#output_csv_dir = 'C:/Users/marya1/Box/MnDOT DNRTOR Project/Meenakshi/detect_objects' +#timestamps_csv_dir = 'C:/Users/marya1/Box/MnDOT DNRTOR Project/Meenakshi/detect_red' +#merged_csv_dir = 'C:/Users/marya1/Box/MnDOT DNRTOR Project/Meenakshi/final_csv' + + +output_csv_dir = '/home/marya1/Documents/MnDoTNRToR/inference/detect_objects/processing' +timestamps_csv_dir = '/home/marya1/Documents/MnDoTNRToR/inference/detect_red' +merged_csv_dir = '/home/marya1/Documents/MnDoTNRToR/inference/final_csv' + +# Call the function with the directories +merge_csv_with_timestamps(output_csv_dir, timestamps_csv_dir, merged_csv_dir) diff --git a/3_merge_detection_red2.py b/3_merge_detection_red2.py new file mode 100644 index 0000000..0b366e3 --- /dev/null +++ b/3_merge_detection_red2.py @@ -0,0 +1,39 @@ +import pandas as pd +import os +from glob import glob + +def merge_csv_with_timestamps(output_csv_dir, timestamps_csv_dir, merged_csv_dir): + output_csv_files = glob(os.path.join(output_csv_dir, '*.csv')) + + for output_csv_path in output_csv_files: + base_file_name = os.path.basename(output_csv_path) + timestamps_csv_path = os.path.join(timestamps_csv_dir, base_file_name) + + if not os.path.exists(timestamps_csv_path): + print(f"No matching timestamp file found for {base_file_name}. Skipping...") + continue + + try: + output_df = pd.read_csv(output_csv_path, on_bad_lines='warn') + timestamps_df = pd.read_csv(timestamps_csv_path, on_bad_lines='warn') + except Exception as e: + print(f"Error reading {base_file_name}: {e}") + continue + + merged_df = pd.merge(output_df, timestamps_df, left_on='Frame', right_on='Frame_Number', how='left') + final_df = merged_df[['Datetime', 'Frame', 'Track', 'Class', 'Class_ID', 'xmin', 'ymin', 'xmax', 'ymax', 'Time', 'Date', 'Red_Light']].copy() + final_df.rename(columns={'Frame': 'FrameNumber', 'Red_Light': 'Color'}, inplace=True) + final_df['Color'].fillna('Not Red', inplace=True) + final_df['Color'] = final_df['Color'].apply(lambda x: 'Red' if x == True else 'Not Red') + + merged_csv_path = os.path.join(merged_csv_dir, base_file_name) + final_df.to_csv(merged_csv_path, index=False) + print(f"Merged CSV saved to {merged_csv_path}") + + +output_csv_dir = '/home/marya1/Documents/MnDoTNRToR/inference/detect_objects/processing' +timestamps_csv_dir = '/home/marya1/Documents/MnDoTNRToR/inference/detect_red' +merged_csv_dir = '/home/marya1/Documents/MnDoTNRToR/inference/final_csv' + +# Use the directories as defined in the script +merge_csv_with_timestamps(output_csv_dir, timestamps_csv_dir, merged_csv_dir) diff --git a/4_detect_violations_confirmation.py b/4_detect_violations_confirmation.py new file mode 100644 index 0000000..6109947 --- /dev/null +++ b/4_detect_violations_confirmation.py @@ -0,0 +1,126 @@ +import cv2 +import pandas as pd +from os.path import join, basename, splitext +from glob import glob + +# Define global variables for ROI drawing +drawing = False +roi_selected = False +top_left_pt, bottom_right_pt = None, None + +def draw_roi(event, x, y, flags, param): + global drawing, roi_selected, top_left_pt, bottom_right_pt + if event == cv2.EVENT_LBUTTONDOWN: + drawing = True + top_left_pt = (x, y) + elif event == cv2.EVENT_LBUTTONUP: + drawing = False + bottom_right_pt = (x, y) + roi_selected = True + +def is_within_roi(xmin, ymin, xmax, ymax, roi): + roi_xmin, roi_ymin, roi_xmax, roi_ymax = roi + return xmin >= roi_xmin and xmax <= roi_xmax and ymin >= roi_ymin and ymax <= roi_ymax + + +def count_vehicles_and_violations(csv_file, roi, valid_classes=[1, 2, 3, 5, 7], min_frames=60): + df = pd.read_csv(csv_file) + # Assuming 'Date' column is in 'YYYY-MM-DD' format and 'Time' in 'HH:MM:SS' format + df['DateTime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], errors='coerce') + + red_light_phases = df[df['Color'] == 'Red'].groupby((df['Color'] != df['Color'].shift()).cumsum()) + results = [] + + for _, phase in red_light_phases: + start_of_red_light = phase['DateTime'].iloc[0] + next_phase_start = phase['DateTime'].iloc[-1] + pd.Timedelta(seconds=1) + + # Skip intervals less than 1 second + if (next_phase_start - start_of_red_light).total_seconds() <= 1: + continue + + tracks_in_roi = df[(df['DateTime'] >= start_of_red_light) & (df['DateTime'] < next_phase_start)] + tracks_in_roi = tracks_in_roi[tracks_in_roi.apply(lambda row: is_within_roi(row['xmin'], row['ymin'], row['xmax'], row['ymax'], roi), axis=1)] + valid_tracks_in_roi = tracks_in_roi[tracks_in_roi['Class_ID'].isin(valid_classes)] + + # Count frames for each track and filter out those with less than min_frames + frame_counts = valid_tracks_in_roi.groupby('Track').size() + tracks_more_than_min_frames = frame_counts[frame_counts >= min_frames].index + valid_tracks_in_roi = valid_tracks_in_roi[valid_tracks_in_roi['Track'].isin(tracks_more_than_min_frames)] + + exiting_tracks = valid_tracks_in_roi.groupby('Track').last() + exiting_tracks = exiting_tracks[exiting_tracks.index.isin(tracks_more_than_min_frames)] + violations = exiting_tracks[exiting_tracks['Color'] == 'Red'].index.nunique() + violating_track_ids = exiting_tracks[exiting_tracks['Color'] == 'Red'].index.unique().tolist() + total_vehicles = valid_tracks_in_roi['Track'].nunique() + + total_vehicles_complying = valid_tracks_in_roi['Track'].nunique() - violations + complying_track_ids = valid_tracks_in_roi[~valid_tracks_in_roi['Track'].isin(violating_track_ids)]['Track'].unique().tolist() + + results.append({ + 'Start_of_Red_Light': start_of_red_light.strftime('%Y-%m-%d %H:%M:%S'), + 'Beginning_of_Not_Red': next_phase_start.strftime('%Y-%m-%d %H:%M:%S'), + 'Total_Vehicles': total_vehicles, + 'Vehicles_Complying': total_vehicles_complying, + 'Vehicles_Violating': violations, + 'Complying Tracks': complying_track_ids, + 'Violating Tracks': violating_track_ids, + 'Compliance Rate': (total_vehicles - violations) / total_vehicles if total_vehicles > 0 else 1 + }) + + results_df = pd.DataFrame(results) + + + if not results_df.empty: + overall_compliance_rate = (results_df['Total_Vehicles'].sum() - results_df['Vehicles_Violating'].sum()) / results_df['Total_Vehicles'].sum() + overall_summary = pd.DataFrame([{'Start_of_Red_Light': 'Overall', 'Beginning_of_Not_Red': '', 'Total_Vehicles': results_df['Total_Vehicles'].sum(), 'Vehicles_Violating': results_df['Vehicles_Violating'].sum(), 'Compliance Rate': overall_compliance_rate}]) + results_df = pd.concat([results_df, overall_summary], ignore_index=True) + + return results_df + +csv_path = '/home/marya1/Documents/MnDoTNRToR/inference/final_csv' +output_path='/home/marya1/Documents/MnDoTNRToR/inference/detect_violations_trial' +videos_path = '/home/marya1/Documents/MnDoTNRToR/videos/processing' + +video_files = glob(join(videos_path, '*.mp4')) + +for video_path in video_files: + cap = cv2.VideoCapture(video_path) + ret, first_frame = cap.read() + if not ret: + print(f"Failed to read video: {video_path}") + continue + + # Reset RoI selection variables + drawing = False + roi_selected = False + top_left_pt, bottom_right_pt = None, None + + cv2.namedWindow('Frame') + cv2.setMouseCallback('Frame', draw_roi) + print("Draw a bounding box around the area of interest and press 'Enter'.") + + while True: + frame_copy = first_frame.copy() + if roi_selected: + cv2.rectangle(frame_copy, top_left_pt, bottom_right_pt, (0, 255, 0), 2) + cv2.imshow('Frame', frame_copy) + key = cv2.waitKey(1) & 0xFF + if key == 13: # Enter key is pressed + break + + if not roi_selected: + print(f"RoI not selected for video: {video_path}. Skipping...") + cv2.destroyAllWindows() + continue + + roi = (top_left_pt[0], top_left_pt[1], bottom_right_pt[0], bottom_right_pt[1]) + cv2.destroyAllWindows() + cap.release() + + # Proceed with analysis using selected RoI + csv_file = join(csv_path, splitext(basename(video_path))[0] + '.csv') + analysis_results = count_vehicles_and_violations(csv_file, roi) + output_file = join(output_path, splitext(basename(video_path))[0] + '_violations.csv') + analysis_results.to_csv(output_file, index=False) + print(f"Analysis saved to {output_file}") \ No newline at end of file diff --git a/4_detect_violations_final.py b/4_detect_violations_final.py new file mode 100644 index 0000000..76654a4 --- /dev/null +++ b/4_detect_violations_final.py @@ -0,0 +1,220 @@ +import cv2 +import pandas as pd +from os.path import join, basename, splitext +from glob import glob +from sklearn.cluster import DBSCAN +import numpy as np + +csv_path = '/home/marya1/Documents/MnDoTNRToR/inference/final_csv' +output_path='/home/marya1/Documents/MnDoTNRToR/inference/detect_violations' +videos_path = '/home/marya1/Documents/MnDoTNRToR/videos/processing' +output_file_path='/home/marya1/Documents/MnDoTNRToR/inference/detect_violations_final' + +# Define global variables for ROI drawing +drawing = False +roi_selected = False +top_left_pt, bottom_right_pt = None, None + +def draw_roi(event, x, y, flags, param): + global drawing, roi_selected, top_left_pt, bottom_right_pt + if event == cv2.EVENT_LBUTTONDOWN: + drawing = True + top_left_pt = (x, y) + elif event == cv2.EVENT_LBUTTONUP: + drawing = False + bottom_right_pt = (x, y) + roi_selected = True + +def is_within_roi(xmin, ymin, xmax, ymax, roi): + roi_xmin, roi_ymin, roi_xmax, roi_ymax = roi + return xmin >= roi_xmin and xmax <= roi_xmax and ymin >= roi_ymin and ymax <= roi_ymax + +def calculate_speed(tracks_df): + tracks_df['x_center'] = (tracks_df['xmin'] + tracks_df['xmax']) / 2 + tracks_df['y_center'] = (tracks_df['ymin'] + tracks_df['ymax']) / 2 + tracks_df.sort_values(by=['Track', 'DateTime'], inplace=True) + tracks_df['x_diff'] = tracks_df.groupby('Track')['x_center'].diff() + tracks_df['y_diff'] = tracks_df.groupby('Track')['y_center'].diff() + tracks_df['time_diff'] = tracks_df.groupby('Track')['DateTime'].diff().dt.total_seconds() + tracks_df['speed'] = ((tracks_df['x_diff']**2 + tracks_df['y_diff']**2)**0.5) / tracks_df['time_diff'] + tracks_df['speed'].fillna(0, inplace=True) + return tracks_df + +def vehicle_exited_roi(tracks_df, roi, buffer_distance=10): + def did_exit(row): + roi_xmin, roi_ymin, roi_xmax, roi_ymax = roi + near_exit = row['xmax'] > roi_xmax - buffer_distance or row['ymax'] > roi_ymax - buffer_distance or row['xmin'] < roi_xmin + buffer_distance or row['ymin'] < roi_ymin + buffer_distance + likely_to_exit = row['speed'] > 0 and near_exit + return likely_to_exit + + # Create a copy to avoid SettingWithCopyWarning when modifying + modified_df = tracks_df.copy() + + # Ensure 'Exited_ROI' column exists to avoid issues with assigning to a non-existent column + if 'Exited_ROI' not in modified_df.columns: + modified_df['Exited_ROI'] = False + + # Get the last row index of each group + last_row_indices = modified_df.groupby('Track').tail(1).index + + # Use .loc to update the 'Exited_ROI' column for these indices + modified_df.loc[last_row_indices, 'Exited_ROI'] = modified_df.loc[last_row_indices].apply(did_exit, axis=1) + + return modified_df + + +def count_vehicles_and_violations(csv_file, roi, valid_classes=[1, 2, 3, 5, 7], min_frames=60): + df = pd.read_csv(csv_file) + df['DateTime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], errors='coerce') + + df = calculate_speed(df) # Calculate speed for the df + + red_light_phases = df[df['Color'] == 'Red'].groupby((df['Color'] != df['Color'].shift()).cumsum()) + results = [] + + for _, phase in red_light_phases: + start_of_red_light = phase['DateTime'].iloc[0] + next_phase_start = phase['DateTime'].iloc[-1] + pd.Timedelta(seconds=1) + + if (next_phase_start - start_of_red_light).total_seconds() <= 1: + continue + + tracks_in_roi = df[(df['DateTime'] >= start_of_red_light) & (df['DateTime'] < next_phase_start)] + tracks_in_roi = tracks_in_roi[tracks_in_roi.apply(lambda row: is_within_roi(row['xmin'], row['ymin'], row['xmax'], row['ymax'], roi), axis=1)] + valid_tracks_in_roi = tracks_in_roi[tracks_in_roi['Class_ID'].isin(valid_classes)] + + valid_tracks_in_roi = vehicle_exited_roi(valid_tracks_in_roi, roi) # Check for exits + + frame_counts = valid_tracks_in_roi.groupby('Track').size() + tracks_more_than_min_frames = frame_counts[frame_counts >= min_frames].index + valid_tracks_in_roi = valid_tracks_in_roi[valid_tracks_in_roi['Track'].isin(tracks_more_than_min_frames)] + + exiting_tracks = valid_tracks_in_roi[valid_tracks_in_roi['Exited_ROI'] == True].groupby('Track').last() + exiting_tracks = exiting_tracks[exiting_tracks.index.isin(tracks_more_than_min_frames)] + violations = exiting_tracks[exiting_tracks['Color'] == 'Red'].index.nunique() + violating_track_ids = exiting_tracks[exiting_tracks['Color'] == 'Red'].index.unique().tolist() + total_vehicles = valid_tracks_in_roi['Track'].nunique() + + total_vehicles_complying = valid_tracks_in_roi['Track'].nunique() - violations + complying_track_ids = valid_tracks_in_roi[~valid_tracks_in_roi['Track'].isin(violating_track_ids)]['Track'].unique().tolist() + + results.append({ + 'Start_of_Red_Light': start_of_red_light.strftime('%Y-%m-%d %H:%M:%S'), + 'Beginning_of_Not_Red': next_phase_start.strftime('%Y-%m-%d %H:%M:%S'), + 'Total_Vehicles': total_vehicles, + 'Vehicles_Complying': total_vehicles_complying, + 'Vehicles_Violating': violations, + 'Complying Tracks': complying_track_ids, + 'Violating Tracks': violating_track_ids, + 'Compliance Rate': (total_vehicles - violations) / total_vehicles if total_vehicles > 0 else 1 + }) + + results_df = pd.DataFrame(results) + if not results_df.empty: + results_df = pd.concat([results_df, pd.DataFrame([{'Start_of_Red_Light': 'Overall', 'Total_Vehicles': results_df['Total_Vehicles'].sum(), 'Vehicles_Violating': results_df['Vehicles_Violating'].sum(), 'Compliance Rate': results_df['Compliance Rate'].mean()}])], ignore_index=True) + + return results_df + +def process_csv_with_time_condition(input_file_path, output_file_path): + """ + Processes a CSV file by sorting based on 'Start_of_Red_Light', then iterates + through each row to keep rows where 'Start_of_Red_Light' or 'Beginning_of_Not_Red' + in the next row is not the same as in the current row, and if the time difference + between them is exactly 1 second. The first row is always kept. + + Parameters: + - input_file_path: Path to the input CSV file. + - output_file_path: Path where the processed CSV file will be saved. + """ + # Load the CSV file into a DataFrame + df = pd.read_csv(input_file_path) + + # Convert columns to datetime, coercing errors to NaT + df['Start_of_Red_Light'] = pd.to_datetime(df['Start_of_Red_Light'], errors='coerce') + df['Beginning_of_Not_Red'] = pd.to_datetime(df['Beginning_of_Not_Red'], errors='coerce') + + # Filter out rows where datetime conversion was unsuccessful + df = df.dropna(subset=['Start_of_Red_Light', 'Beginning_of_Not_Red']) + + # Sort the DataFrame based on the 'Start_of_Red_Light' column + df_sorted = df.sort_values(by='Start_of_Red_Light') + + # Initialize a list to keep rows that meet the criteria, starting with the first row + rows_to_keep = [0] + + # Iterate through the sorted DataFrame starting from the second row + for i in range(1, len(df_sorted)): + previous_row = df_sorted.iloc[i - 1] + current_row = df_sorted.iloc[i] + + # Calculate time differences + diff_start = abs((current_row['Start_of_Red_Light'] - previous_row['Start_of_Red_Light']).total_seconds()) + diff_end = abs((current_row['Beginning_of_Not_Red'] - previous_row['Beginning_of_Not_Red']).total_seconds()) + duration=abs((current_row['Beginning_of_Not_Red']-current_row['Start_of_Red_Light']).total_seconds()) + + # Determine if the current row should be kept based on the conditions + if (duration>=10): + if not(diff_start <= 5 or diff_end <= 5): + rows_to_keep.append(i) + + # Filter the DataFrame based on the calculated rows to keep + df_filtered = df_sorted.iloc[rows_to_keep] + + # Save the filtered DataFrame to the output CSV file + #df_filtered.to_csv(output_file_path, index=False) + results_df_final = pd.DataFrame(df_filtered) + if not results_df_final.empty: + results_df_final = pd.concat([results_df_final, pd.DataFrame([{'Start_of_Red_Light': 'Overall', 'Total_Vehicles': results_df_final['Total_Vehicles'].sum(), 'Vehicles_Violating': results_df_final['Vehicles_Violating'].sum(), 'Compliance Rate': results_df_final['Compliance Rate'].mean()}])], ignore_index=True) + results_df_final.to_csv(output_file_path, index=False) + + print(f"Processed file has been saved to: {output_file_path}") + +video_files = glob(join(videos_path, '*.mp4')) + +for video_path in video_files: + cap = cv2.VideoCapture(video_path) + ret, first_frame = cap.read() + if not ret: + print(f"Failed to read video: {video_path}") + continue + + # Reset RoI selection variables + drawing = False + roi_selected = False + top_left_pt, bottom_right_pt = None, None + + cv2.namedWindow('Frame') + cv2.setMouseCallback('Frame', draw_roi) + print("Draw a bounding box around the area of interest and press 'Enter'.") + + while True: + frame_copy = first_frame.copy() + if roi_selected: + cv2.rectangle(frame_copy, top_left_pt, bottom_right_pt, (0, 255, 0), 2) + cv2.imshow('Frame', frame_copy) + key = cv2.waitKey(1) & 0xFF + if key == 13: # Enter key is pressed + break + + if not roi_selected: + print(f"RoI not selected for video: {video_path}. Skipping...") + cv2.destroyAllWindows() + continue + + roi = (top_left_pt[0], top_left_pt[1], bottom_right_pt[0], bottom_right_pt[1]) + cv2.destroyAllWindows() + cap.release() + + # Proceed with analysis using selected RoI + csv_file = join(csv_path, splitext(basename(video_path))[0] + '.csv') + analysis_results = count_vehicles_and_violations(csv_file, roi) + output_file = join(output_path, splitext(basename(video_path))[0] + '_violations_final_test.csv') + output_file_final = join(output_file_path, splitext(basename(video_path))[0] + '_violations.csv') + analysis_results.to_csv(output_file, index=False) + print(f"Analysis saved to {output_file}") + process_csv_with_time_condition(output_file, output_file_final) + + + + + diff --git a/README.md b/README.md index fab8153..899e647 100644 --- a/README.md +++ b/README.md @@ -1,57 +1,18 @@ -# Roboflow Object Tracking Example - -Object tracking using Roboflow Inference API and Zero-Shot (CLIP) Deep SORT. Read more in our -[Zero-Shot Object Tracking announcement post](https://blog.roboflow.com/zero-shot-object-tracking/). - -![Example fish tracking](https://user-images.githubusercontent.com/870796/130703648-8af62801-d66c-41f5-80ae-889301ae9b44.gif) - -Example object tracking courtesy of the [Roboflow Universe public Aquarium model and dataset](https://universe.roboflow.com/brad-dwyer/aquarium-combined). You can adapt this to your own dataset on Roboflow or any pre-trained model from [Roboflow Universe](https://universe.roboflow.com). - -# Overview - -Object tracking involves following individual objects of interest across frames. It -combines the output of an [object detection](https://blog.roboflow.com/object-detection) model -with a secondary algorithm to determine which detections are identifying "the same" -object over time. - -Previously, this required training a special classification model to differentiate -the instances of each different class. In this repository, we have used -[OpenAI's CLIP zero-shot image classifier](https://blog.roboflow.com/clip-model-eli5-beginner-guide/) -to create a universal object tracking repository. All you need is a trained object -detection model and CLIP handles the instance identification for the object tracking -algorithm. - -# Getting Started - -Colab Tutorial Here: - -Open In Colab - -## Training your model - -To use the Roboflow Inference API as your detection engine: - -Upload, annotate, and train your model on Roboflow with [Roboflow Train](https://docs.roboflow.com/train). -Your model will be hosted on an inference URL. - -To use YOLOv7 as your detection engine: - -Follow Roboflow's [Train YOLOv7 on Custom Data Tutorial](https://blog.roboflow.com/yolov7-custom-dataset-training-tutorial/) - -The YOLOv7 implementation uses [this colab notebook](https://colab.research.google.com/drive/1X9A8odmK4k6l26NDviiT6dd6TgR-piOa) - -To use YOLOv5 as your detection engine: +## Performing Object Tracking -Follow Roboflow's [Train YOLOv5 on Custom Data Tutorial](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/) +#### Install Anaconda or miniconda -The YOLOv5 implementation uses [this colab notebook](https://colab.research.google.com/drive/1gDZ2xcTOgR39tGGs-EZ6i3RTs16wmzZQ) +### Create a virtual environment -The YOLOv5 implementation is currently compatible with this commit hash of YOLOv5 `886f1c03d839575afecb059accf74296fad395b6` +conda create -n myenv +e.g. conda create -n myenv mndot -## Performing Object Tracking +### Activate virtual environment -### Clone repositories +conda activate +e.g. conda activate mndot +### Clone repositories in a single folder ``` git clone https://github.com/roboflow-ai/zero-shot-object-tracking cd zero-shot-object-tracking @@ -59,60 +20,43 @@ git clone https://github.com/openai/CLIP.git CLIP-repo cp -r ./CLIP-repo/clip ./clip // Unix based robocopy CLIP-repo/clip clip\ // Windows ``` - ### Install requirements (python 3.7+) ```bash pip install --upgrade pip pip install -r requirements.txt +conda install -c conda-forge scipy ``` ### Install requirements (anaconda python 3.8) ``` conda install pytorch torchvision torchaudio -c pytorch conda install ftfy regex tqdm requests pandas seaborn +conda install -c conda-forge scipy pip install opencv pycocotools tensorflow -``` - -### Run with Roboflow -```bash - -python clip_object_tracker.py --source data/video/fish.mp4 --url https://detect.roboflow.com/playing-cards-ow27d/1 --api_key ROBOFLOW_API_KEY --info ``` +### Download YOLOv7 weights from -**NOTE you must provide a valid API key from [Roboflow](docs.roboflow.com) +https://github.com/WongKinYiu/yolov7?tab=readme-ov-file ### Run with YOLOv7 ```bash +python clip_object_tracker.py --weights models/yolov7.pt --source data/video/fish.mp4 --detection-engine yolov7 --info -python clip_object_tracker.py --weights models/yolov7.pt --source data/video/fish.mp4 --detection-engine yolov7 --info -``` +python3 clip_object_tracker_bbox_only_separate_columns.py --weights --conf 0.5 --save-txt --save-conf --name --source --detection-engine yolov7 --info -### Run with YOLOv5 -```bash +e.g. python3 clip_object_tracker_bbox_only_separate_columns.py --weights models/yolov7x.pt --conf 0.5 --save-txt --save-conf --name 10.2_PM_NTOR --source /home/marya/Desktop/zero-shot-object-tracking/data/video/10.2_PM_NTOR.mp4 --detection-engine yolov7 --info -python clip_object_tracker.py --weights models/yolov5s.pt --source data/video/fish.mp4 --detection-engine yolov5 --info ``` +### THE DETECTIONS ARE ALWAYS SAVED IN THE /runs/detect FOLDER FOLLOWED BY THE PATH NAME THAT YOU HAVE SPECIFIED. YOU CAN ALSO SPECIFY OTHER PARAMETERS BY LOOKING AT THE VARIOUS ARGUMENTS THAT CAN BE PASSED IN THE clip_object_tracker_bbox_only_separate_columns.py FILE OR AS GIVEN BELOW -### Run with YOLOv4 -To use YOLOv4 for object detection you will need pretrained weights (.weights file), a model config for your weights (.cfg), and a class names file (.names). Test weights can be found here https://github.com/AlexeyAB/darknet. [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) ``` -python clip_object_tracker.py --weights yolov4.weights --cfg yolov4.cfg --names coco.names --source data/video/cars.mp4 --detection-engine yolov4 --info ``` -(by default, output will be in runs/detect/exp[num]) +### In case you want to loop over videos in a folder, the bash files can be run by updating the path where all the video files are stored using the following command -
- -
+bash run_all.py -Help - -```bash -python clip_object_tracker.py -h -``` ``` --weights WEIGHTS [WEIGHTS ...] model.pt path(s) --source SOURCE source (video/image) diff --git a/clip_object_tracker_bbox_only.py b/clip_object_tracker_bbox_only.py new file mode 100644 index 0000000..bda8a41 --- /dev/null +++ b/clip_object_tracker_bbox_only.py @@ -0,0 +1,368 @@ +import argparse +import time +from pathlib import Path + +import clip + +import cv2 +import torch +import torch.backends.cudnn as cudnn +from numpy import random +import numpy as np + +from models.experimental import attempt_load +from utils.datasets import LoadStreams, LoadImages +from utils.general import xyxy2xywh, xywh2xyxy, \ + strip_optimizer, set_logging, increment_path, scale_coords +from utils.plots import plot_one_box +from utils.torch_utils import select_device, time_synchronized +from utils.roboflow import predict_image + +# deep sort imports +from deep_sort import preprocessing, nn_matching +from deep_sort.detection import Detection +from deep_sort.tracker import Tracker +from tools import generate_clip_detections as gdet + +from utils.yolov5 import Yolov5Engine +from utils.yolov4 import Yolov4Engine +from utils.yolov7 import Yolov7Engine + +classes = [] + +names = [] + + +def update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn): + if len(tracker.tracks): + print("[Tracks]", len(tracker.tracks)) + + for track in tracker.tracks: + if not track.is_confirmed() or track.time_since_update > 1: + continue + xyxy = track.to_tlbr() + class_num = track.class_num + bbox = xyxy + class_name = names[int(class_num)] if opt.detection_engine == "yolov5" or "yolov7" else class_num + if opt.info: + print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format( + str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) + + if save_txt: # Write to file + with open(txt_path + '.txt', 'a') as f: + f.write('frame: {}; track: {}; class: {}; xmin: {:.0f}, ymin:{:.0f}, xmax:{:.0f}, ymax:{:.0f}:;\n'.format(frame_count, track.track_id, class_num, + int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) + if save_img or view_img: # Add bbox to image + label = f'{class_name} #{track.track_id}' + plot_one_box(xyxy, im0, label=label, + color=get_color_for(label), line_thickness=opt.thickness) + +def get_color_for(class_num): + colors = [ + "#4892EA", + "#00EEC3", + "#FE4EF0", + "#F4004E", + "#FA7200", + "#EEEE17", + "#90FF00", + "#78C1D2", + "#8C29FF" + ] + + num = hash(class_num) # may actually be a number or a string + hex = colors[num%len(colors)] + + # adapted from https://stackoverflow.com/questions/29643352/converting-hex-to-rgb-value-in-python + rgb = tuple(int(hex.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)) + + return rgb + +def detect(save_img=False): + + t0 = time_synchronized() + + nms_max_overlap = opt.nms_max_overlap + max_cosine_distance = opt.max_cosine_distance + nn_budget = opt.nn_budget + + # initialize deep sort + model_filename = "ViT-B/16" + device = "cuda" if torch.cuda.is_available() else "cpu" + half = device != "cpu" + model, transform = clip.load(model_filename, device=device, jit=False) + model.eval() + encoder = gdet.create_box_encoder(model, transform, batch_size=1, device=device) + # calculate cosine distance metric + metric = nn_matching.NearestNeighborDistanceMetric( + "cosine", max_cosine_distance, nn_budget) + + # load yolov5 model here + if opt.detection_engine == "yolov5": + yolov5_engine = Yolov5Engine(opt.weights, device, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half) + global names + names = yolov5_engine.get_names() + # load yolov7 model here + elif opt.detection_engine == "yolov7": + yolov7_engine = Yolov7Engine(opt.weights, device, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half) + names = yolov7_engine.get_names() + elif opt.detection_engine == "yolov4": + yolov4_engine = Yolov4Engine(opt.weights, opt.cfg, device, opt.names, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half) + + # initialize tracker + tracker = Tracker(metric) + + source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size + webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( + ('rtsp://', 'rtmp://', 'http://')) + + # Directories + save_dir = Path(increment_path(Path(opt.project) / opt.name, + exist_ok=opt.exist_ok)) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, + exist_ok=True) # make dir + + # Initialize + set_logging() + device = select_device(opt.device) + half = device.type != 'cpu' # half precision only supported on CUDA + + # Set Dataloader + vid_path, vid_writer = None, None + if webcam: + view_img = True + cudnn.benchmark = True # set True to speed up constant image size inference + dataset = LoadStreams(source, img_size=imgsz) + else: + save_img = True + dataset = LoadImages(source, img_size=imgsz) + + frame_count = 0 + img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img + if opt.detection_engine == "yolov5": + _ = yolov5_engine.infer(img.half() if half else img) if device.type != 'cpu' else None # run once + elif opt.detection_engine == "yolov7": + _ = yolov7_engine.infer(img.half() if half else img) if device.type != 'cpu' else None # run once + for path, img, im0s, vid_cap in dataset: + + img = torch.from_numpy(img).to(device) + img = img.half() if half else img.float() # uint8 to fp16/32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + if img.ndimension() == 3: + img = img.unsqueeze(0) + + # Roboflow Inference + t1 = time_synchronized() + p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) + + # choose between prediction engines (roboflow, yolov5, and yolov7) + if opt.detection_engine == "roboflow": + pred, classes = predict_image(im0, opt.api_key, opt.url, opt.confidence, opt.overlap, frame_count) + pred = [torch.tensor(pred)] + elif opt.detection_engine == "yolov5": + print("yolov5 inference") + pred = yolov5_engine.infer(img) + elif opt.detection_engine == "yolov7": + print("yolov7 inference") + pred = yolov7_engine.infer(img) + else: + print("yolov4 inference {}".format(im0.shape)) + pred = yolov4_engine.infer(im0) + pred, classes = yolov4_engine.postprocess(pred, im0.shape) + pred = [torch.tensor(pred)] + + t2 = time_synchronized() + # Process detections + for i, det in enumerate(pred): # detections per image + #moved up to roboflow inference + """if webcam: # batch_size >= 1 + p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( + ), dataset.count + else: + p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)""" + + p = Path(p) # to Path + save_path = str(save_dir / p.name) # img.jpg + txt_path = str(save_dir / 'labels' / p.stem) + \ + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt + + # normalization gain whwh + gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] + if len(det): + + print("\n[Detections]") + if opt.detection_engine == "roboflow": + # Print results + clss = np.array(classes) + for c in np.unique(clss): + n = (clss == c).sum() # detections per class + s += f'{n} {c}, ' # add to string + + trans_bboxes = det[:, :4].clone() + bboxes = trans_bboxes[:, :4].cpu() + confs = det[:, 4] + + elif opt.detection_engine == "yolov4": + + # Print results + # Rescale boxes from img_size to im0 size + #det[:, :4] = scale_coords([1,1], det[:, :4], im0.shape).round() + clss = np.array(classes) + for c in np.unique(clss): + n = (clss == c).sum() # detections per class + s += f'{n} {c}, ' # add to string + + + # Transform bboxes from tlbr to tlwh + trans_bboxes = det[:, :4].clone() + bboxes = trans_bboxes[:, :4].cpu() + confs = det[:, 4] + + """for idx, box in enumerate(bboxes): + plot_one_box(xywh2xyxy(torch.tensor(box).view(1, 4))[0], im0, label=classes[idx], + color=get_color_for(classes[idx]), line_thickness=opt.thickness)""" + + print(s) + else: + + # Print results + # Rescale boxes from img_size to im0 size + + det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() + for c in det[:, -1].unique(): + n = (det[:, -1] == c).sum() # detections per class + s += f'{n} {names[int(c)]}s, ' # add to string + + # Transform bboxes from tlbr to tlwh + trans_bboxes = det[:, :4].clone() + trans_bboxes[:, 2:] -= trans_bboxes[:, :2] + bboxes = trans_bboxes[:, :4].cpu() + confs = det[:, 4] + class_nums = det[:, -1].cpu() + classes = class_nums + + print(s) + + + + # encode yolo detections and feed to tracker + features = encoder(im0, bboxes) + detections = [Detection(bbox, conf, class_num, feature) for bbox, conf, class_num, feature in zip( + bboxes, confs, classes, features)] + + # run non-maxima supression + boxs = np.array([d.tlwh for d in detections]) + scores = np.array([d.confidence for d in detections]) + class_nums = np.array([d.class_num for d in detections]) + indices = preprocessing.non_max_suppression( + boxs, class_nums, nms_max_overlap, scores) + detections = [detections[i] for i in indices] + + # Call the tracker + tracker.predict() + tracker.update(detections) + + # update tracks + update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn) + + # Print time (inference + NMS) + print(f'Done. ({t2 - t1:.3f}s)') + + # Stream results + if view_img: + cv2.imshow(str(p), im0) + if cv2.waitKey(1) == ord('q'): # q to quit + raise StopIteration + + # Save results (image with detections) + if save_img: + if dataset.mode == 'image': + cv2.imwrite(save_path, im0) + else: # 'video' + if vid_path != save_path: # new video + vid_path = save_path + if isinstance(vid_writer, cv2.VideoWriter): + vid_writer.release() # release previous video writer + + fourcc = 'mp4v' # output video codec + #fps = vid_cap.get(cv2.CAP_PROP_FPS) + fps=1 + w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) + vid_writer.write(im0) + + frame_count = frame_count+1 + + if save_txt or save_img: + s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' + print(f"Results saved to {save_dir}{s}") + + print(f'Done. ({time.time() - t0:.3f}s)') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--weights', nargs='+', type=str, + default='yolov5s.pt', help='model.pt path(s)') + parser.add_argument('--cfg', type=str, + default='yolov4.cfg', help='yolov4 model cfg file path') + parser.add_argument('--names', type=str, + default='coco.names', help='yolov4 names file, file path') + # file/folder, 0 for webcam + parser.add_argument('--source', type=str, + default='data/images', help='source') + parser.add_argument('--img-size', type=int, default=640, + help='inference size (pixels)') + parser.add_argument('--confidence', type=float, + default=0.40, help='object confidence threshold') + parser.add_argument('--overlap', type=float, + default=0.30, help='IOU threshold for NMS') + parser.add_argument('--thickness', type=int, + default=3, help='Thickness of the bounding box strokes') + parser.add_argument('--device', default='', + help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--view-img', action='store_true', + help='display results') + parser.add_argument('--save-txt', action='store_true', + help='save results to *.txt') + parser.add_argument('--save-conf', action='store_true', + help='save confidences in --save-txt labels') + parser.add_argument('--classes', nargs='+', type=int, + help='filter by class: --class 0, or --class 0 2 3') + parser.add_argument('--agnostic-nms', action='store_true', + help='class-agnostic NMS') + parser.add_argument('--augment', action='store_true', + help='augmented inference') + parser.add_argument('--update', action='store_true', + help='update all models') + parser.add_argument('--project', default='runs/detect', + help='save results to project/name') + parser.add_argument('--name', default='exp', + help='save results to project/name') + parser.add_argument('--exist-ok', action='store_true', + help='existing project/name ok, do not increment') + parser.add_argument('--nms_max_overlap', type=float, default=1.0, + help='Non-maxima suppression threshold: Maximum detection overlap.') + parser.add_argument('--max_cosine_distance', type=float, default=0.4, + help='Gating threshold for cosine distance metric (object appearance).') + parser.add_argument('--nn_budget', type=int, default=None, + help='Maximum size of the appearance descriptors allery. If None, no budget is enforced.') + parser.add_argument('--api_key', default=None, + help='Roboflow API Key.') + parser.add_argument('--url', default=None, + help='Roboflow Model URL.') + parser.add_argument('--info', action='store_true', + help='Print debugging info.') + parser.add_argument("--detection-engine", default="roboflow", help="Which engine you want to use for object detection (yolov7, yolov5, yolov4, roboflow).") + opt = parser.parse_args() + print(opt) + + with torch.no_grad(): + if opt.update: # update all models (to fix SourceChangeWarning) + for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: + detect() + strip_optimizer(opt.weights) + else: + detect() diff --git a/clip_object_tracker_bbox_only_1fps.py b/clip_object_tracker_bbox_only_1fps.py new file mode 100644 index 0000000..37dabe0 --- /dev/null +++ b/clip_object_tracker_bbox_only_1fps.py @@ -0,0 +1,371 @@ +import argparse +import time +from pathlib import Path + +import clip + +import cv2 +import torch +import torch.backends.cudnn as cudnn +from numpy import random +import numpy as np + +from models.experimental import attempt_load +from utils.datasets import LoadStreams, LoadImages +from utils.general import xyxy2xywh, xywh2xyxy, \ + strip_optimizer, set_logging, increment_path, scale_coords +from utils.plots import plot_one_box +from utils.torch_utils import select_device, time_synchronized +from utils.roboflow import predict_image + +# deep sort imports +from deep_sort import preprocessing, nn_matching +from deep_sort.detection import Detection +from deep_sort.tracker import Tracker +from tools import generate_clip_detections as gdet + +from utils.yolov5 import Yolov5Engine +from utils.yolov4 import Yolov4Engine +from utils.yolov7 import Yolov7Engine + +classes = [] + +names = [] + + +def update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn): + if len(tracker.tracks): + print("[Tracks]", len(tracker.tracks)) + + for track in tracker.tracks: + if not track.is_confirmed() or track.time_since_update > 1: + continue + xyxy = track.to_tlbr() + class_num = track.class_num + bbox = xyxy + class_name = names[int(class_num)] if opt.detection_engine == "yolov5" or "yolov7" else class_num + if opt.info: + print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format( + str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) + + if save_txt: # Write to file + with open(txt_path + '.txt', 'a') as f: + f.write('frame: {}; track: {}; class: {}; xmin: {:.0f}, ymin:{:.0f}, xmax:{:.0f}, ymax:{:.0f}:;\n'.format(frame_count, track.track_id, class_num, + int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) + if save_img or view_img: # Add bbox to image + label = f'{class_name} #{track.track_id}' + plot_one_box(xyxy, im0, label=label, + color=get_color_for(label), line_thickness=opt.thickness) + +def get_color_for(class_num): + colors = [ + "#4892EA", + "#00EEC3", + "#FE4EF0", + "#F4004E", + "#FA7200", + "#EEEE17", + "#90FF00", + "#78C1D2", + "#8C29FF" + ] + + num = hash(class_num) # may actually be a number or a string + hex = colors[num%len(colors)] + + # adapted from https://stackoverflow.com/questions/29643352/converting-hex-to-rgb-value-in-python + rgb = tuple(int(hex.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)) + + return rgb + +def detect(save_img=False): + + t0 = time_synchronized() + + nms_max_overlap = opt.nms_max_overlap + max_cosine_distance = opt.max_cosine_distance + nn_budget = opt.nn_budget + + # initialize deep sort + model_filename = "ViT-B/16" + device = "cuda" if torch.cuda.is_available() else "cpu" + half = device != "cpu" + model, transform = clip.load(model_filename, device=device, jit=False) + model.eval() + encoder = gdet.create_box_encoder(model, transform, batch_size=1, device=device) + # calculate cosine distance metric + metric = nn_matching.NearestNeighborDistanceMetric( + "cosine", max_cosine_distance, nn_budget) + + # load yolov5 model here + if opt.detection_engine == "yolov5": + yolov5_engine = Yolov5Engine(opt.weights, device, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half) + global names + names = yolov5_engine.get_names() + # load yolov7 model here + elif opt.detection_engine == "yolov7": + yolov7_engine = Yolov7Engine(opt.weights, device, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half) + names = yolov7_engine.get_names() + elif opt.detection_engine == "yolov4": + yolov4_engine = Yolov4Engine(opt.weights, opt.cfg, device, opt.names, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half) + + # initialize tracker + tracker = Tracker(metric) + + source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size + webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( + ('rtsp://', 'rtmp://', 'http://')) + + # Directories + save_dir = Path(increment_path(Path(opt.project) / opt.name, + exist_ok=opt.exist_ok)) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, + exist_ok=True) # make dir + + # Initialize + set_logging() + device = select_device(opt.device) + half = device.type != 'cpu' # half precision only supported on CUDA + + # Set Dataloader + vid_path, vid_writer = None, None + if webcam: + view_img = True + cudnn.benchmark = True # set True to speed up constant image size inference + dataset = LoadStreams(source, img_size=imgsz) + else: + save_img = True + dataset = LoadImages(source, img_size=imgsz) + + frame_count = 0 + #frame_idx = 0 + img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img + if opt.detection_engine == "yolov5": + _ = yolov5_engine.infer(img.half() if half else img) if device.type != 'cpu' else None # run once + elif opt.detection_engine == "yolov7": + _ = yolov7_engine.infer(img.half() if half else img) if device.type != 'cpu' else None # run once + + for path, img, im0s, vid_cap in dataset: + video_frame_rate = vid_cap.get(cv2.CAP_PROP_FPS) # Replace YOUR_VIDEO_FRAME_RATE_HERE with the actual frame rate + frames_per_second = video_frame_rate + img = torch.from_numpy(img).to(device) + img = img.half() if half else img.float() # uint8 to fp16/32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + if img.ndimension() == 3: + img = img.unsqueeze(0) + + # Roboflow Inference + t1 = time_synchronized() + p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) + + # choose between prediction engines (roboflow, yolov5, and yolov7) + if opt.detection_engine == "roboflow": + pred, classes = predict_image(im0, opt.api_key, opt.url, opt.confidence, opt.overlap, frame_count) + pred = [torch.tensor(pred)] + elif opt.detection_engine == "yolov5": + print("yolov5 inference") + pred = yolov5_engine.infer(img) + elif opt.detection_engine == "yolov7": + print("yolov7 inference") + pred = yolov7_engine.infer(img) + else: + print("yolov4 inference {}".format(im0.shape)) + pred = yolov4_engine.infer(im0) + pred, classes = yolov4_engine.postprocess(pred, im0.shape) + pred = [torch.tensor(pred)] + + t2 = time_synchronized() + # Process detections + for i, det in enumerate(pred): # detections per image + #moved up to roboflow inference + """if webcam: # batch_size >= 1 + p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( + ), dataset.count + else: + p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)""" + + p = Path(p) # to Path + save_path = str(save_dir / p.name) # img.jpg + txt_path = str(save_dir / 'labels' / p.stem) + \ + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt + + # normalization gain whwh + gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] + if len(det): + + print("\n[Detections]") + if opt.detection_engine == "roboflow": + # Print results + clss = np.array(classes) + for c in np.unique(clss): + n = (clss == c).sum() # detections per class + s += f'{n} {c}, ' # add to string + + trans_bboxes = det[:, :4].clone() + bboxes = trans_bboxes[:, :4].cpu() + confs = det[:, 4] + + elif opt.detection_engine == "yolov4": + + # Print results + # Rescale boxes from img_size to im0 size + #det[:, :4] = scale_coords([1,1], det[:, :4], im0.shape).round() + clss = np.array(classes) + for c in np.unique(clss): + n = (clss == c).sum() # detections per class + s += f'{n} {c}, ' # add to string + + + # Transform bboxes from tlbr to tlwh + trans_bboxes = det[:, :4].clone() + bboxes = trans_bboxes[:, :4].cpu() + confs = det[:, 4] + + """for idx, box in enumerate(bboxes): + plot_one_box(xywh2xyxy(torch.tensor(box).view(1, 4))[0], im0, label=classes[idx], + color=get_color_for(classes[idx]), line_thickness=opt.thickness)""" + + print(s) + else: + + # Print results + # Rescale boxes from img_size to im0 size + + det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() + for c in det[:, -1].unique(): + n = (det[:, -1] == c).sum() # detections per class + s += f'{n} {names[int(c)]}s, ' # add to string + + # Transform bboxes from tlbr to tlwh + trans_bboxes = det[:, :4].clone() + trans_bboxes[:, 2:] -= trans_bboxes[:, :2] + bboxes = trans_bboxes[:, :4].cpu() + confs = det[:, 4] + class_nums = det[:, -1].cpu() + classes = class_nums + + print(s) + + + + # encode yolo detections and feed to tracker + features = encoder(im0, bboxes) + detections = [Detection(bbox, conf, class_num, feature) for bbox, conf, class_num, feature in zip( + bboxes, confs, classes, features)] + + # run non-maxima supression + boxs = np.array([d.tlwh for d in detections]) + scores = np.array([d.confidence for d in detections]) + class_nums = np.array([d.class_num for d in detections]) + indices = preprocessing.non_max_suppression( + boxs, class_nums, nms_max_overlap, scores) + detections = [detections[i] for i in indices] + + + # Call the tracker + tracker.predict() + tracker.update(detections) + if frame_count % frames_per_second == 0: + update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn) + + # Print time (inference + NMS) + print(f'Done. ({t2 - t1:.3f}s)') + + # Stream results + if view_img: + cv2.imshow(str(p), im0) + if cv2.waitKey(1) == ord('q'): # q to quit + raise StopIteration + + # Save results (image with detections) + if save_img: + if dataset.mode == 'image': + cv2.imwrite(save_path, im0) + else: # 'video' + if vid_path != save_path: # new video + vid_path = save_path + if isinstance(vid_writer, cv2.VideoWriter): + vid_writer.release() # release previous video writer + + fourcc = 'mp4v' # output video codec + fps = vid_cap.get(cv2.CAP_PROP_FPS) + fps_1=fps/60 + w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*fourcc), fps_1, (w, h)) + vid_writer.write(im0) + + frame_count = frame_count+1 + + if save_txt or save_img: + s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' + print(f"Results saved to {save_dir}{s}") + + print(f'Done. ({time.time() - t0:.3f}s)') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--weights', nargs='+', type=str, + default='yolov5s.pt', help='model.pt path(s)') + parser.add_argument('--cfg', type=str, + default='yolov4.cfg', help='yolov4 model cfg file path') + parser.add_argument('--names', type=str, + default='coco.names', help='yolov4 names file, file path') + # file/folder, 0 for webcam + parser.add_argument('--source', type=str, + default='data/images', help='source') + parser.add_argument('--img-size', type=int, default=640, + help='inference size (pixels)') + parser.add_argument('--confidence', type=float, + default=0.40, help='object confidence threshold') + parser.add_argument('--overlap', type=float, + default=0.30, help='IOU threshold for NMS') + parser.add_argument('--thickness', type=int, + default=3, help='Thickness of the bounding box strokes') + parser.add_argument('--device', default='', + help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--view-img', action='store_true', + help='display results') + parser.add_argument('--save-txt', action='store_true', + help='save results to *.txt') + parser.add_argument('--save-conf', action='store_true', + help='save confidences in --save-txt labels') + parser.add_argument('--classes', nargs='+', type=int, + help='filter by class: --class 0, or --class 0 2 3') + parser.add_argument('--agnostic-nms', action='store_true', + help='class-agnostic NMS') + parser.add_argument('--augment', action='store_true', + help='augmented inference') + parser.add_argument('--update', action='store_true', + help='update all models') + parser.add_argument('--project', default='runs/detect', + help='save results to project/name') + parser.add_argument('--name', default='exp', + help='save results to project/name') + parser.add_argument('--exist-ok', action='store_true', + help='existing project/name ok, do not increment') + parser.add_argument('--nms_max_overlap', type=float, default=1.0, + help='Non-maxima suppression threshold: Maximum detection overlap.') + parser.add_argument('--max_cosine_distance', type=float, default=0.4, + help='Gating threshold for cosine distance metric (object appearance).') + parser.add_argument('--nn_budget', type=int, default=None, + help='Maximum size of the appearance descriptors allery. If None, no budget is enforced.') + parser.add_argument('--api_key', default=None, + help='Roboflow API Key.') + parser.add_argument('--url', default=None, + help='Roboflow Model URL.') + parser.add_argument('--info', action='store_true', + help='Print debugging info.') + parser.add_argument("--detection-engine", default="roboflow", help="Which engine you want to use for object detection (yolov7, yolov5, yolov4, roboflow).") + opt = parser.parse_args() + print(opt) + + with torch.no_grad(): + if opt.update: # update all models (to fix SourceChangeWarning) + for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: + detect() + strip_optimizer(opt.weights) + else: + detect() diff --git a/clip_object_tracker_bbox_only_separate_columns.py b/clip_object_tracker_bbox_only_separate_columns.py new file mode 100644 index 0000000..e7dcb43 --- /dev/null +++ b/clip_object_tracker_bbox_only_separate_columns.py @@ -0,0 +1,367 @@ +import argparse +import time +from pathlib import Path + +import clip + +import cv2 +import torch +import torch.backends.cudnn as cudnn +from numpy import random +import numpy as np + +from models.experimental import attempt_load +from utils.datasets import LoadStreams, LoadImages +from utils.general import xyxy2xywh, xywh2xyxy, \ + strip_optimizer, set_logging, increment_path, scale_coords +from utils.plots import plot_one_box +from utils.torch_utils import select_device, time_synchronized +from utils.roboflow import predict_image + +# deep sort imports +from deep_sort import preprocessing, nn_matching +from deep_sort.detection import Detection +from deep_sort.tracker import Tracker +from tools import generate_clip_detections as gdet + +from utils.yolov5 import Yolov5Engine +from utils.yolov4 import Yolov4Engine +from utils.yolov7 import Yolov7Engine + +classes = [] + +names = [] + + +def update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn): + if len(tracker.tracks): + print("[Tracks]", len(tracker.tracks)) + + for track in tracker.tracks: + if not track.is_confirmed() or track.time_since_update > 1: + continue + xyxy = track.to_tlbr() + class_num = track.class_num + bbox = xyxy + class_name = names[int(class_num)] if opt.detection_engine == "yolov5" or "yolov7" else class_num + if opt.info: + print("Tracker ID: {}, Class: {}, BBox Coords (xmin, ymin, xmax, ymax): {}".format( + str(track.track_id), class_name, (int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])))) + + if save_txt: # Write to file + with open(txt_path + '.txt', 'a') as f: + f.write('frame: {}; track: {}; class: {}; class_id: {}; xmin:{:.0f}; ymin:{:.0f}; xmax:{:.0f}; ymax:{:.0f}\n'.format(frame_count, track.track_id, class_name, class_num, + int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]))) + if save_img or view_img: # Add bbox to image + label = f'{class_name} #{track.track_id}' + plot_one_box(xyxy, im0, label=label, + color=get_color_for(label), line_thickness=opt.thickness) + +def get_color_for(class_num): + colors = [ + "#4892EA", + "#00EEC3", + "#FE4EF0", + "#F4004E", + "#FA7200", + "#EEEE17", + "#90FF00", + "#78C1D2", + "#8C29FF" + ] + + num = hash(class_num) # may actually be a number or a string + hex = colors[num%len(colors)] + + # adapted from https://stackoverflow.com/questions/29643352/converting-hex-to-rgb-value-in-python + rgb = tuple(int(hex.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)) + + return rgb + +def detect(save_img=False): + + t0 = time_synchronized() + + nms_max_overlap = opt.nms_max_overlap + max_cosine_distance = opt.max_cosine_distance + nn_budget = opt.nn_budget + + # initialize deep sort + model_filename = "ViT-B/16" + device = "cuda" if torch.cuda.is_available() else "cpu" + half = device != "cpu" + model, transform = clip.load(model_filename, device=device, jit=False) + model.eval() + encoder = gdet.create_box_encoder(model, transform, batch_size=1, device=device) + # calculate cosine distance metric + metric = nn_matching.NearestNeighborDistanceMetric( + "cosine", max_cosine_distance, nn_budget) + + # load yolov5 model here + if opt.detection_engine == "yolov5": + yolov5_engine = Yolov5Engine(opt.weights, device, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half) + global names + names = yolov5_engine.get_names() + # load yolov7 model here + elif opt.detection_engine == "yolov7": + yolov7_engine = Yolov7Engine(opt.weights, device, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half) + names = yolov7_engine.get_names() + elif opt.detection_engine == "yolov4": + yolov4_engine = Yolov4Engine(opt.weights, opt.cfg, device, opt.names, opt.classes, opt.confidence, opt.overlap, opt.agnostic_nms, opt.augment, half) + + # initialize tracker + tracker = Tracker(metric) + + source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size + webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( + ('rtsp://', 'rtmp://', 'http://')) + + # Directories + save_dir = Path(increment_path(Path(opt.project) / opt.name, + exist_ok=opt.exist_ok)) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, + exist_ok=True) # make dir + + # Initialize + set_logging() + device = select_device(opt.device) + half = device.type != 'cpu' # half precision only supported on CUDA + + # Set Dataloader + vid_path, vid_writer = None, None + if webcam: + view_img = True + cudnn.benchmark = True # set True to speed up constant image size inference + dataset = LoadStreams(source, img_size=imgsz) + else: + save_img = True + dataset = LoadImages(source, img_size=imgsz) + + frame_count = 0 + img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img + if opt.detection_engine == "yolov5": + _ = yolov5_engine.infer(img.half() if half else img) if device.type != 'cpu' else None # run once + elif opt.detection_engine == "yolov7": + _ = yolov7_engine.infer(img.half() if half else img) if device.type != 'cpu' else None # run once + for path, img, im0s, vid_cap in dataset: + + img = torch.from_numpy(img).to(device) + img = img.half() if half else img.float() # uint8 to fp16/32 + img /= 255.0 # 0 - 255 to 0.0 - 1.0 + if img.ndimension() == 3: + img = img.unsqueeze(0) + + # Roboflow Inference + t1 = time_synchronized() + p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) + + # choose between prediction engines (roboflow, yolov5, and yolov7) + if opt.detection_engine == "roboflow": + pred, classes = predict_image(im0, opt.api_key, opt.url, opt.confidence, opt.overlap, frame_count) + pred = [torch.tensor(pred)] + elif opt.detection_engine == "yolov5": + print("yolov5 inference") + pred = yolov5_engine.infer(img) + elif opt.detection_engine == "yolov7": + print("yolov7 inference") + pred = yolov7_engine.infer(img) + else: + print("yolov4 inference {}".format(im0.shape)) + pred = yolov4_engine.infer(im0) + pred, classes = yolov4_engine.postprocess(pred, im0.shape) + pred = [torch.tensor(pred)] + + t2 = time_synchronized() + # Process detections + for i, det in enumerate(pred): # detections per image + #moved up to roboflow inference + """if webcam: # batch_size >= 1 + p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( + ), dataset.count + else: + p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)""" + + p = Path(p) # to Path + save_path = str(save_dir / p.name) # img.jpg + txt_path = str(save_dir / 'labels' / p.stem) + \ + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt + + # normalization gain whwh + gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] + if len(det): + + print("\n[Detections]") + if opt.detection_engine == "roboflow": + # Print results + clss = np.array(classes) + for c in np.unique(clss): + n = (clss == c).sum() # detections per class + s += f'{n} {c}, ' # add to string + + trans_bboxes = det[:, :4].clone() + bboxes = trans_bboxes[:, :4].cpu() + confs = det[:, 4] + + elif opt.detection_engine == "yolov4": + + # Print results + # Rescale boxes from img_size to im0 size + #det[:, :4] = scale_coords([1,1], det[:, :4], im0.shape).round() + clss = np.array(classes) + for c in np.unique(clss): + n = (clss == c).sum() # detections per class + s += f'{n} {c}, ' # add to string + + + # Transform bboxes from tlbr to tlwh + trans_bboxes = det[:, :4].clone() + bboxes = trans_bboxes[:, :4].cpu() + confs = det[:, 4] + + """for idx, box in enumerate(bboxes): + plot_one_box(xywh2xyxy(torch.tensor(box).view(1, 4))[0], im0, label=classes[idx], + color=get_color_for(classes[idx]), line_thickness=opt.thickness)""" + + print(s) + else: + + # Print results + # Rescale boxes from img_size to im0 size + + det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() + for c in det[:, -1].unique(): + n = (det[:, -1] == c).sum() # detections per class + s += f'{n} {names[int(c)]}s, ' # add to string + + # Transform bboxes from tlbr to tlwh + trans_bboxes = det[:, :4].clone() + trans_bboxes[:, 2:] -= trans_bboxes[:, :2] + bboxes = trans_bboxes[:, :4].cpu() + confs = det[:, 4] + class_nums = det[:, -1].cpu() + classes = class_nums + + print(s) + + + + # encode yolo detections and feed to tracker + features = encoder(im0, bboxes) + detections = [Detection(bbox, conf, class_num, feature) for bbox, conf, class_num, feature in zip( + bboxes, confs, classes, features)] + + # run non-maxima supression + boxs = np.array([d.tlwh for d in detections]) + scores = np.array([d.confidence for d in detections]) + class_nums = np.array([d.class_num for d in detections]) + indices = preprocessing.non_max_suppression( + boxs, class_nums, nms_max_overlap, scores) + detections = [detections[i] for i in indices] + + # Call the tracker + tracker.predict() + tracker.update(detections) + + # update tracks + update_tracks(tracker, frame_count, save_txt, txt_path, save_img, view_img, im0, gn) + + # Print time (inference + NMS) + print(f'Done. ({t2 - t1:.3f}s)') + + # Stream results + if view_img: + cv2.imshow(str(p), im0) + if cv2.waitKey(1) == ord('q'): # q to quit + raise StopIteration + + # Save results (image with detections) + if save_img: + if dataset.mode == 'image': + cv2.imwrite(save_path, im0) + else: # 'video' + if vid_path != save_path: # new video + vid_path = save_path + if isinstance(vid_writer, cv2.VideoWriter): + vid_writer.release() # release previous video writer + + fourcc = 'mp4v' # output video codec + fps = vid_cap.get(cv2.CAP_PROP_FPS) + w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) + vid_writer.write(im0) + + frame_count = frame_count+1 + + if save_txt or save_img: + s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' + print(f"Results saved to {save_dir}{s}") + + print(f'Done. ({time.time() - t0:.3f}s)') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--weights', nargs='+', type=str, + default='yolov5s.pt', help='model.pt path(s)') + parser.add_argument('--cfg', type=str, + default='yolov4.cfg', help='yolov4 model cfg file path') + parser.add_argument('--names', type=str, + default='coco.names', help='yolov4 names file, file path') + # file/folder, 0 for webcam + parser.add_argument('--source', type=str, + default='data/images', help='source') + parser.add_argument('--img-size', type=int, default=640, + help='inference size (pixels)') + parser.add_argument('--confidence', type=float, + default=0.40, help='object confidence threshold') + parser.add_argument('--overlap', type=float, + default=0.30, help='IOU threshold for NMS') + parser.add_argument('--thickness', type=int, + default=3, help='Thickness of the bounding box strokes') + parser.add_argument('--device', default='', + help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--view-img', action='store_true', + help='display results') + parser.add_argument('--save-txt', action='store_true', + help='save results to *.txt') + parser.add_argument('--save-conf', action='store_true', + help='save confidences in --save-txt labels') + parser.add_argument('--classes', nargs='+', type=int, + help='filter by class: --class 0, or --class 0 2 3') + parser.add_argument('--agnostic-nms', action='store_true', + help='class-agnostic NMS') + parser.add_argument('--augment', action='store_true', + help='augmented inference') + parser.add_argument('--update', action='store_true', + help='update all models') + parser.add_argument('--project', default='runs/detect', + help='save results to project/name') + parser.add_argument('--name', default='exp', + help='save results to project/name') + parser.add_argument('--exist-ok', action='store_true', + help='existing project/name ok, do not increment') + parser.add_argument('--nms_max_overlap', type=float, default=1.0, + help='Non-maxima suppression threshold: Maximum detection overlap.') + parser.add_argument('--max_cosine_distance', type=float, default=0.4, + help='Gating threshold for cosine distance metric (object appearance).') + parser.add_argument('--nn_budget', type=int, default=None, + help='Maximum size of the appearance descriptors allery. If None, no budget is enforced.') + parser.add_argument('--api_key', default=None, + help='Roboflow API Key.') + parser.add_argument('--url', default=None, + help='Roboflow Model URL.') + parser.add_argument('--info', action='store_true', + help='Print debugging info.') + parser.add_argument("--detection-engine", default="roboflow", help="Which engine you want to use for object detection (yolov7, yolov5, yolov4, roboflow).") + opt = parser.parse_args() + print(opt) + + with torch.no_grad(): + if opt.update: # update all models (to fix SourceChangeWarning) + for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: + detect() + strip_optimizer(opt.weights) + else: + detect() diff --git a/commands b/commands new file mode 100644 index 0000000..4ba0fe2 --- /dev/null +++ b/commands @@ -0,0 +1,15 @@ +https://github.com/roboflow/zero-shot-object-tracking?ref=blog.roboflow.com + +conda install -c conda-forge scipy + +conda activate mndot_env + +python3 clip_object_tracker.py --weights models/yolov7x.pt --conf 0.5 --save-txt --save-conf --name 6-7-13-30 --source /home/marya/Desktop/zero-shot-object-tracking/data/video/YUHAN/NO_TURN_ON_RED_ON_US12_ANALYSIS/PED_ACTIVE/6-7/6-7-13-30.mp4 --detection-engine yolov7 --info + +python3 clip_object_tracker_mod.py --weights models/yolov7x.pt --conf 0.5 --save-txt --save-conf --name 6-7-13-30 --source /home/marya/Desktop/zero-shot-object-tracking/data/video/YUHAN/NO_TURN_ON_RED_ON_US12_ANALYSIS/PED_ACTIVE/6-7/6-7-13-30.mp4 --detection-engine yolov7 --info + +python3 clip_object_tracker_bbox_only.py --weights models/yolov7x.pt --conf 0.5 --save-txt --save-conf --name 6-7-13-30 --source /home/marya/Desktop/zero-shot-object-tracking/data/video/YUHAN/NO_TURN_ON_RED_ON_US12_ANALYSIS/PED_ACTIVE/6-7/6-7-13-30.mp4 --detection-engine yolov7 --info + +python3 clip_object_tracker_bbox_only_separate_columns.py --weights models/yolov7x.pt --conf 0.5 --save-txt --save-conf --name 10.2_PM_NTOR --source /home/marya/Desktop/zero-shot-object-tracking/data/video/10.2_PM_NTOR.mp4 --detection-engine yolov7 --info + + diff --git a/run_all_GPU0.py b/run_all_GPU0.py new file mode 100644 index 0000000..ec73eb4 --- /dev/null +++ b/run_all_GPU0.py @@ -0,0 +1,24 @@ +import os +import subprocess + +def find_and_process_mp4_files(root_folder): + for root, dirs, files in os.walk(root_folder): + for file in files: + if file.endswith('.mp4'): + mp4_path = os.path.join(root, file) + cmd = [ + 'python3', 'clip_object_tracker_bbox_only_separate_columns.py', + '--weights', 'models/yolov7x.pt', + '--conf', '0.5', + '--save-txt', + '--save-conf', + '--name', os.path.splitext(file)[0], # Name of the mp4 file without extension + '--source', mp4_path, # Path to the mp4 file + '--detection-engine', 'yolov7', + '--info' + ] + print(f"Running command for file: {mp4_path}") + subprocess.run(cmd) + +# Replace 'path_to_folder' with the path to your target folder +find_and_process_mp4_files('/home/marya/Desktop/zero-shot-object-tracking/data/video/US_12_ToD_Analysis') diff --git a/run_all_GPU1.py b/run_all_GPU1.py new file mode 100644 index 0000000..2db75ce --- /dev/null +++ b/run_all_GPU1.py @@ -0,0 +1,25 @@ +import os +import subprocess + +def find_and_process_mp4_files(root_folder): + for root, dirs, files in os.walk(root_folder): + for file in files: + if file.endswith('.mp4'): + mp4_path = os.path.join(root, file) + cmd = [ + 'python3', 'clip_object_tracker_bbox_only_separate_columns.py', + '--weights', 'models/yolov7x.pt', + '--conf', '0.5', + '--save-txt', + '--save-conf', + '--name', os.path.splitext(file)[0], # Name of the mp4 file without extension + '--source', mp4_path, # Path to the mp4 file + '--detection-engine', 'yolov7', + '--info' + ] + print(f"Running command for file: {mp4_path}") + subprocess.run(cmd) + +# Replace 'path_to_folder' with the path to your target folder +#find_and_process_mp4_files('/home/marya/Desktop/zero-shot-object-tracking/data/video/I35W_ANALYSIS') +find_and_process_mp4_files('/home/marya/Desktop/zero-shot-object-tracking/data/video/US_12_ToD_Analysis_PM') diff --git a/run_all_GPU2.py b/run_all_GPU2.py new file mode 100644 index 0000000..a4e8ef6 --- /dev/null +++ b/run_all_GPU2.py @@ -0,0 +1,24 @@ +import os +import subprocess + +def find_and_process_mp4_files(root_folder): + for root, dirs, files in os.walk(root_folder): + for file in files: + if file.endswith('.mp4'): + mp4_path = os.path.join(root, file) + cmd = [ + 'python3', 'clip_object_tracker_bbox_only_separate_columns.py', + '--weights', 'models/yolov7x.pt', + '--conf', '0.5', + '--save-txt', + '--save-conf', + '--name', os.path.splitext(file)[0], # Name of the mp4 file without extension + '--source', mp4_path, # Path to the mp4 file + '--detection-engine', 'yolov7', + '--info' + ] + print(f"Running command for file: {mp4_path}") + subprocess.run(cmd) + +# Replace 'path_to_folder' with the path to your target folder +find_and_process_mp4_files('/home/marya/Desktop/zero-shot-object-tracking/data/video/US_12_ToD_Analysis_AM')