Skip to content

How to align point clouds in Unity? #6210

Open
@xiaolijz

Description

@xiaolijz

I obtain RGB-D images and camera poses from Unity. I convert the RGB-D data into point clouds and try to align them in the same coordinate system to reconstruct the 3D world. However, the result is incorrect, the point clouds are not properly aligned and I can't figure out how to solve the problem.

The code below is for obtaining RGB-D and camera pose:
`using UnityEngine;
using UnityEngine.Rendering.Universal;
using System.Collections;
using System.IO;

public class RGBDCapture : MonoBehaviour
{
public Camera cam;
private UniversalAdditionalCameraData cameraData;

private RenderTexture rgbRT;
private RenderTexture depthRT;

private int width = 640;
private int height = 480;

public int totalFrames = 10;       // 你要采集的帧数
public float interval = 1.5f;      // 每帧间隔秒数
private int currentFrame = 0;

private bool isCapturing = false;

void Start()
{
    if (cam == null)
    {
        Debug.LogError("Camera not assigned!");
        return;
    }

    cameraData = cam.GetComponent<UniversalAdditionalCameraData>();
    if (cameraData == null)
    {
        Debug.LogWarning("No UniversalAdditionalCameraData found on camera. Adding one.");
        cameraData = cam.gameObject.AddComponent<UniversalAdditionalCameraData>();
    }

    rgbRT = new RenderTexture(width, height, 24, RenderTextureFormat.ARGB32);
    depthRT = new RenderTexture(width, height, 24, RenderTextureFormat.ARGB32);
}

[ContextMenu("Start RGB-D Capture Sequence")]
public void StartCaptureSequence()
{
    if (!isCapturing)
    {
        currentFrame = 0;
        StartCoroutine(CaptureSequence());
    }
}

IEnumerator CaptureSequence()
{
    yield return new WaitForEndOfFrame();
    isCapturing = true;

    while (currentFrame < totalFrames)
    {
        string rgbName = $"rgb_{currentFrame:D4}.png";
        string depthName = $"depth_{currentFrame:D4}.exr";
        string poseName = $"pose_{currentFrame:D4}.txt";

        // 🟡 先保存 Pose(图像采样前)
        SaveCameraPose(poseName);

        // --- Capture RGB ---
        cameraData.SetRenderer(0); // RGB Renderer
        cam.targetTexture = rgbRT;
        cam.Render();
        SaveRenderTextureToPNG(rgbRT, rgbName);

        // --- Capture Depth ---
        cameraData.SetRenderer(1); // Depth Renderer
        cam.targetTexture = depthRT;
        cam.Render();
        SaveRenderTextureToEXR(depthRT, depthName);

        cam.targetTexture = null;
        Debug.Log($"Captured frame {currentFrame}");

        currentFrame++;
        yield return new WaitForSeconds(interval);
    }

    isCapturing = false;
    Debug.Log("RGB-D sequence capture complete.");
}

void SaveRenderTextureToPNG(RenderTexture rt, string fileName)
{
    string folder = Application.dataPath + "/Captured/";
    if (!Directory.Exists(folder))
        Directory.CreateDirectory(folder);

    string path = Path.Combine(folder, fileName);
    RenderTexture.active = rt;
    Texture2D tex = new Texture2D(rt.width, rt.height, TextureFormat.RGB24, false);
    tex.ReadPixels(new Rect(0, 0, rt.width, rt.height), 0, 0);
    tex.Apply();

    byte[] bytes = tex.EncodeToPNG();
    File.WriteAllBytes(path, bytes);

    RenderTexture.active = null;
}

void SaveRenderTextureToEXR(RenderTexture rt, string fileName)
{
    string folder = Application.dataPath + "/Captured/";
    if (!Directory.Exists(folder))
        Directory.CreateDirectory(folder);

    string path = Path.Combine(folder, fileName);

    RenderTexture.active = rt;

    Texture2D tex = new Texture2D(rt.width, rt.height, TextureFormat.RGBAFloat, false);
    tex.ReadPixels(new Rect(0, 0, rt.width, rt.height), 0, 0);
    tex.Apply();

    byte[] bytes = tex.EncodeToEXR(Texture2D.EXRFlags.OutputAsFloat);
    File.WriteAllBytes(path, bytes);

    RenderTexture.active = null;
}

void SaveCameraPose(string fileName)
{
    string folder = Application.dataPath + "/Captured/";
    if (!Directory.Exists(folder))
        Directory.CreateDirectory(folder);

    string path = Path.Combine(folder, fileName);

    // 获取相机的位姿(世界坐标系中的 4x4 矩阵)
    // Matrix4x4 m = cam.transform.localToWorldMatrix;
    Matrix4x4 m = Matrix4x4.TRS(cam.transform.position, cam.transform.rotation, Vector3.one);
    using (StreamWriter writer = new StreamWriter(path))
    {
        for (int i = 0; i < 4; i++)
        {
            string line = $"{m[i, 0]} {m[i, 1]} {m[i, 2]} {m[i, 3]}";
            writer.WriteLine(line);
        }
    }
}

}
The code below is for converting and aligning:import os
import cv2
import numpy as np
import open3d as o3d
import OpenEXR
import Imath

def read_exr_depth(exr_path):
exr_file = OpenEXR.InputFile(exr_path)
dw = exr_file.header()['dataWindow']
width = dw.max.x - dw.min.x + 1
height = dw.max.y - dw.min.y + 1
pt = Imath.PixelType(Imath.PixelType.FLOAT)
depth_str = exr_file.channel('R', pt)
depth = np.frombuffer(depth_str, dtype=np.float32).reshape((height, width))
return depth

def unity_to_opencv_coords(points):

points[:, 1] *= -1
return points

def convert_unity_pose_to_opencv(pose_unity):

R_unity_to_cv = np.diag([1, -1, -1])  
T = np.eye(4)
T[:3, :3] = R_unity_to_cv


pose_cv = T @ pose_unity @ np.linalg.inv(T)
return pose_cv

def load_pose(path):
return np.loadtxt(path)

def create_pointcloud(rgb_img, depth_img, fx, fy, cx, cy):
h, w = depth_img.shape
i, j = np.meshgrid(np.arange(w), np.arange(h))
z = depth_img.copy()
z[z >= 0.99] = 0

x = (i - cx) * z / fx
y = (j - cy) * z / fy

points = np.stack((x, y, z), axis=-1).reshape(-1, 3)
colors = rgb_img[:, :, :3].reshape(-1, 3) / 255.0  # BGR to RGB
return points, colors

def transform_points(points, pose):
points_hom = np.hstack((points, np.ones((points.shape[0], 1))))
# flip_z = np.diag([1, 1, -1])
# pose[:3,:3] = flip_z @ pose[:3,:3]
# T = np.eye(4)
# T[:3,:3] = np.eye(3)
# T[:3,3] = np.array([500,0,500])
# pose = np.linalg.inv(T) @ pose
points_world = (pose @ points_hom.T).T[:, :3]
return points_world

fx, fy, cx, cy = 1598.515 , 865.4121, 320, 240

data_dir = "/home/jimmy/task/Unity_Projects/Terrain/Assets/Captured"
num_frames = 10

all_points = []
all_colors = []
pose_lists = []
for i in range(20):
rgb_path = os.path.join(data_dir, f"rgb_{i:04d}.png")
depth_path = os.path.join(data_dir, f"depth_{i:04d}.exr")
pose_path = os.path.join(data_dir, f"pose_{i:04d}.txt")

rgb = cv2.imread(rgb_path, cv2.IMREAD_UNCHANGED)[:, :, ::-1]  # BGR -> RGB
depth = read_exr_depth(depth_path)
pose = load_pose(pose_path)
# pose_centered = convert_unity_pose_to_opencv(pose)
# print(f"pose = {pose}")
T_center = np.zeros(3)
# T_center[:3, 3] = -pose[:3, 3] 
pose[:3,3] = T_center 
pose_rotation = pose[:3,:3]

pose[:3,:3] = pose_rotation
print(f"pose = {pose}")
points, colors = create_pointcloud(rgb, depth, fx, fy, cx, cy)
points = unity_to_opencv_coords(points)
points_world = transform_points(points, pose)
coord_pose = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)
coord_pose.transform(pose)
all_points.append(points_world)
# all_points.append(points)
all_colors.append(colors)
pose_lists.append(coord_pose)

all_points = np.vstack(all_points)
all_colors = np.vstack(all_colors)

coord = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1)

coord.transform(pose)

pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(all_points)
pcd.colors = o3d.utility.Vector3dVector(all_colors)
o3d.visualization.draw_geometries([*pose_lists])
o3d.visualization.draw_geometries([pcd, coord])

`
The result

Image
and the real scene

Image

Looking forward to your reply.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugIssue describes a potential bug in ml-agents.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions