Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions docs/Camera.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Camera
Updated: Jan 21, 2024
Updated: May 2, 2025
---
**Current state:** Currently have a working raw camera node, need to fix the compressed image portion, and the launch file.
**Current state:** raw camera node which publishes image data from a specific camera port, camera node that provides the DetectObjects service for any currently running camera

### How does the camera package work?
This package has the following dependencies:
Expand All @@ -11,4 +11,32 @@ This package has the following dependencies:
- cvbridge
- sensor_msgs.msg

This package basically creates an `/image` topic which can be subscribed to using RVIZ2.
### Overview

**camera_node_raw**

publishes images to a topic corresponding to a specific camera

**camera_node**

pulls image data from all running camera_node_raw, and provides DetectObjects service for any one of them


### ROS Parameters (IMPORTANT!!)
These parameters can be passed at runtime:

**camera_node_raw**

camera_port: integer value, /dev/video device to read feed from
fps: integer value, framerate
camera_id: string, one of a set of camera id's. Each id is mapped to it's own topic. In DetectObjects.srv, this is passed in the request.camera_id field. Makes sure a consistent API is exposed to client nodes requesting the DetectObjects service for a specific camera port. Possible values (subject to change): front, rear, top, bottom

**camera_node**

display_output: bool value, decides whether to publish annotated image to camera specific image_annotated topic

### Troubleshooting

- Make sure the correct custom_interfaces are sourced; the DetectObjects service as a request field that needs to be filled!

- camera_node only publishes when the service is called and the proper parameters are passed, resulting in the fps of the image_annotated topics being limited by the frequency of requests
169 changes: 104 additions & 65 deletions src/chimera_camera/chimera_camera/CameraPublisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
from custom_interfaces.srv import DetectObjects
from ultralytics import YOLO
from rclpy.node import Node
#from cv_bridge import CvBridge
from sensor_msgs.msg import CompressedImage
from cv_bridge import CvBridge
import cv2
import numpy as np


CAMERA_PORT = 0

class Camera(Node):
def __init__(self):
super().__init__('CameraPublisher')
Expand All @@ -21,39 +20,73 @@ def __init__(self):
self.model = YOLO("../models/yolo11m.pt")

# Initiate the CvBridge to convert OpenCV images into ROS images
#self.bridge = CvBridge()

# Opens up the camera port to be read from
self.cam_feed = cv2.VideoCapture(CAMERA_PORT)

# If camera does not open, shut down the node because it is useless
if not self.cam_feed.isOpened():
self.get_logger().error("Failed to open camera")
rclpy.shutdown()
return

# OpenCV keeps a buffer of frames, we only need the most recent frame
self.cam_feed.set(cv2.CAP_PROP_BUFFERSIZE, 1)

self.get_logger().info("Camera Node Successfully Initialized")



def cleanup(self):
self.get_logger().info("Cleaning up resources...")
if self.cam_feed.isOpened():
self.cam_feed.release()
cv2.destroyAllWindows()
self.bridge = CvBridge()

# parameter for displaying output
self.declare_parameter('display_output', False)
self.display_output = self.get_parameter('display_output').get_parameter_value().bool_value

# valid camera topic names for camera publishers, subscribers, and service requests
self.sub_map = {
"front": "/image/front/image_raw",
"rear": "/image/rear/image_raw",
"top": "/image/top/image_raw",
"bottom": "/image/bottom/image_raw"
}

self.pub_map = {
"front": "/image/front/image_annotated",
"rear": "/image/rear/image_annotated",
"top": "/image/top/image_annotated",
"bottom": "/image/bottom/image_annotated"
}

# save all images from subscribed camera topics
self.image_cache = {}

# save publishers for each camera topic
self.pubs = {}

# find all valid running cameras
camera_topics = [n for n, t in self.get_topic_names_and_types() if n in self.sub_map.values()]

# does not read from /dev/ to prevent conflicts with cameraCompressed
# subscribes to all camera topics according to convention
for cam_id in self.sub_map.keys():
if self.sub_map[cam_id] in camera_topics:
self.create_subscription(CompressedImage, self.sub_map[cam_id], self.create_callback(self.sub_map[cam_id]), 10)
p = self.create_publisher(CompressedImage, self.pub_map[cam_id], 10)
self.pubs[cam_id] = p
if not camera_topics:
self.get_logger().error(f'No active camera topics found!')
else:
self.get_logger().info(f"Detect Objects Service Successfully Initialized, subscribed to {camera_topics}")

# callback retains access to topic to cache the returned msg
def create_callback(self, topic):
def callback(msg):
self.image_cache[topic] = msg
return callback

def detect_objects(self, request, response):

unused = self.cam_feed.read() # Clear buffer
ret, frame = self.cam_feed.read() # Get the latest frame
# recieve requested camera topic in DetectObjects.camera_topic
camera_id = request.camera_id

if not ret:
self.get_logger().error("Could not read camera")
camera_topic = self.sub_map.get(camera_id)

if not camera_topic:
self.get_logger().error(f"Invalid camera_id: {camera_id}. Select from 'front, rear, top, bottom")
return response
elif camera_topic not in self.image_cache.keys():
self.get_logger().error(f"Unavailable camera_id: {camera_id}")
return response

# retrieve latest frame from image cache
msg = self.image_cache[camera_topic]
np_arr = np.frombuffer(msg.data, np.uint8)
frame = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)

# Perform object detection on the frame using YOLO11m
results = self.model(frame)

Expand All @@ -64,46 +97,55 @@ def detect_objects(self, request, response):
top_left = []
bottom_right = []
# Draw bounding boxes and labels
for detection in results[0].boxes:

# Get the coords for the bounding box, confidence scores, and labels. The .cpu().numpy() moves the tensors to cpu memory where they can
#be converted to numpy arrays. Tensors can be converted to numpy in the GPU.

xyxy = detection.xyxy.cpu().numpy()[0]
confidence = detection.conf.cpu().numpy()[0]
label = self.model.names[int(detection.cls.cpu().numpy()[0])]
for result in results:
for detection in result.boxes:

# Get the coords for the bounding box, confidence scores, and labels. The .cpu().numpy() moves the tensors to cpu memory where they can
#be converted to numpy arrays. Tensors can be converted to numpy in the GPU.

xyxy = detection.xyxy.cpu().numpy()[0]
confidence = detection.conf.cpu().numpy()[0]
label = self.model.names[int(detection.cls.cpu().numpy()[0])]


start_point = (int(xyxy[0]), int(xyxy[1])) # Top-left corner
end_point = (int(xyxy[2]), int(xyxy[3])) # Bottom-right corner
cv2.rectangle(frame, start_point, end_point, (0, 255, 0), 2) # Green box with thickness 2

start_point = (int(xyxy[0]), int(xyxy[1])) # Top-left corner
end_point = (int(xyxy[2]), int(xyxy[3])) # Bottom-right corner
cv2.rectangle(frame, start_point, end_point, (0, 255, 0), 2) # Green box with thickness 2

# Put the label and confidence on the frame
text = f"{label} {confidence:.2f}"
cv2.putText(frame, text, (int(xyxy[0]), int(xyxy[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

# Put the label and confidence on the frame
text = f"{label} {confidence:.2f}"
cv2.putText(frame, text, (int(xyxy[0]), int(xyxy[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Calculate center of bounding box
x_center = (xyxy[0] + xyxy[2]) / 2
y_center = (xyxy[1] + xyxy[3]) / 2

# Calculate center of bounding box
x_center = (xyxy[0] + xyxy[2]) / 2
y_center = (xyxy[1] + xyxy[3]) / 2
# Store detection data for response
if float(confidence) > 0.75:
top_left.extend([float(xyxy[0]), float(xyxy[1])])
bottom_right.extend([float(xyxy[2]), float(xyxy[3])])
object_names.append(label)
object_positions_x.append(float(x_center))
object_positions_y.append(float(y_center))
confidences.append(float(confidence))

# Store detection data for response
if float(confidence) > 0.75:
top_left.extend([float(xyxy[0]), float(xyxy[1])])
bottom_right.extend([float(xyxy[2]), float(xyxy[3])])
object_names.append(label)
object_positions_x.append(float(x_center))
object_positions_y.append(float(y_center))
confidences.append(float(confidence))

if self.display_output:
# Resize the image
img_resized = cv2.resize(frame, (640, 480)) # Adjust the size as needed

# Convert the OpenCV image to a ROS compressed image message
try:
image_msg = self.bridge.cv2_to_compressed_imgmsg(img_resized, dst_format='jpeg') # Use JPEG compression
except CvBridgeError as e:
rclpy.logerr(e)
self.destroy_node()
return

# Publish the compressed ROS image message
self.pubs[camera_id].publish(image_msg)

''' Uncomment if you want to see bounding boxes
# Display the resulting frame with bounding boxes
cv2.imshow('Webcam Feed with Detections', frame)

# Wait for a key press to keep the window responsive
cv2.waitKey(0)
'''
# Populate the response
response.object_names = object_names
response.object_positions_x = object_positions_x
Expand All @@ -114,9 +156,6 @@ def detect_objects(self, request, response):

return response

def __del__(self):
self.cleanup() # Ensure cleanup if the object is deleted

def main(args=None):
rclpy.init(args=args)
image = Camera()
Expand Down
81 changes: 66 additions & 15 deletions src/chimera_camera/chimera_camera/cameraCompressed.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,60 @@
import cv2

class Camera(Node):
def __init__(self, cameraport, topic):
super().__init__('CameraPublisher')
self.publisher = self.create_publisher(CompressedImage, topic, 10)
def __init__(self):
super().__init__('CameraPublisherRaw')

self.declare_parameter('camera_id', 'front')
CAMERA_ID = self.get_parameter('camera_id').get_parameter_value().string_value

# valid camera topic names for camera publishers, subscribers, and service requests
self.topic_map = {
"front": "/image/front/image_raw",
"rear": "/image/rear/image_raw",
"top": "/image/top/image_raw",
"bottom": "/image/bottom/image_raw"
}
camera_topic = self.topic_map[CAMERA_ID]
self.get_logger().info(f'Camera id is set to: {CAMERA_ID}')

# publisher based on camera id given in launch file
self.publisher = self.create_publisher(CompressedImage, camera_topic, 10)

# initialize camera to /dev/video0; configurable thru launch files
# the idea is to launch this node for each submarine camera in different namespaces
self.declare_parameter('camera_port', 0)
CAMERA_PORT = self.get_parameter('camera_port').get_parameter_value().integer_value
self.get_logger().info(f'Camera port is set to: {CAMERA_PORT}')

self.bridge = CvBridge()
self.cam_feed = cv2.VideoCapture(cameraport)
self.cam_feed = cv2.VideoCapture(CAMERA_PORT)

# If camera does not open, shut down the node because it is useless
if not self.cam_feed.isOpened():
self.get_logger().error("Failed to open camera")
rclpy.shutdown()
return

# OpenCV keeps a buffer of frames, we only need the most recent frame
self.cam_feed.set(cv2.CAP_PROP_BUFFERSIZE, 1)

def compress_and_publish_image(self, topic='image_compressed'):
self.get_logger().info("Camera Node Successfully Initialized")

# tick length parameter for ticking the behavior tree
self.declare_parameter('fps', 30)
fps = self.get_parameter('fps').get_parameter_value().integer_value

TICK_LENGTH = 1 / fps

self.timer = self.create_timer(TICK_LENGTH, self.compress_and_publish_image)

# the topic to publish to is handled by the namespace assigned in the launch file
def compress_and_publish_image(self):
ret, img = self.cam_feed.read()

if not ret:
self.get_logger().error("Could not read camera")
return

# Resize the image
img_resized = cv2.resize(img, (640, 480)) # Adjust the size as needed
Expand All @@ -28,25 +74,30 @@ def compress_and_publish_image(self, topic='image_compressed'):
image_msg = self.bridge.cv2_to_compressed_imgmsg(img_resized, dst_format='jpeg') # Use JPEG compression
except CvBridgeError as e:
rclpy.logerr(e)
return -1
self.destroy_node()

# Publish the compressed ROS image message
self.publisher.publish(image_msg)

def cleanup(self):
self.get_logger().info("Cleaning up resources...")
if self.cam_feed.isOpened():
self.cam_feed.release()
cv2.destroyAllWindows()

def __del__(self):
self.cleanup() # Ensure cleanup if the object is deleted

def main(args=None):
rclpy.init(args=args)
image = Camera(cameraport=0, topic='image')
image = Camera()

# Use image_transport for publishing compressed images
image_transport = image.create_publisher(CompressedImage, 'image', 10)
#image_transport = image.create_publisher(CompressedImage, 'image', 10)

while True:
if image.compress_and_publish_image() == -1:
break
if (cv2.waitKey(1) & 0xFF == ord("q")) or (cv2.waitKey(1) == 27):
image.destroy_node()
rclpy.shutdown()
break
rclpy.spin(image)
image.destroy_node()
rclpy.shutdown()

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions src/chimera_camera/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
'console_scripts': [
'camera_node = chimera_camera.CameraPublisher:main',
'client = chimera_camera.CameraPublisherTest:main',
'camera_node_raw = chimera_camera.cameraCompressed:main'
],
},

Expand Down
3 changes: 2 additions & 1 deletion src/custom_interfaces/srv/DetectObjects.srv
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# DetectObjects.srv

# Request: No input required
# Request: Which camera for the service to read from: (front, rear, top, bottom)
string camera_id
---

# Response: Detected object names, positions, and confidence scores
Expand Down