From 46e7d63df32da62dd419a59ae157b2728b491169 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Fri, 26 Jan 2024 13:24:43 -0800 Subject: [PATCH 01/22] Switched from using OpenMP for parallelism to using oneAPI::TBB (though not using the oneapi scope). Untested but building. --- .../LegacyReconstructionUtil.h | 115 ++++-- cpp/open3d/core/ParallelFor.h | 17 +- .../core/hashmap/CPU/CPUHashBackendBuffer.cpp | 13 +- cpp/open3d/core/hashmap/CPU/TBBHashBackend.h | 77 ++-- cpp/open3d/core/kernel/NonZeroCPU.cpp | 24 +- cpp/open3d/core/kernel/ReductionCPU.cpp | 102 ++--- cpp/open3d/geometry/EstimateNormals.cpp | 82 ++-- cpp/open3d/geometry/ISSKeypoints.cpp | 81 ++-- cpp/open3d/geometry/Image.cpp | 218 +++++------ cpp/open3d/geometry/PointCloud.cpp | 172 +++++---- cpp/open3d/geometry/PointCloudCluster.cpp | 28 +- .../PointCloudPlanarPatchDetection.cpp | 19 +- .../geometry/PointCloudSegmentation.cpp | 108 +++--- cpp/open3d/geometry/TriangleMesh.cpp | 167 ++++---- .../geometry/TriangleMeshDeformation.cpp | 127 ++++--- .../sensor/azure_kinect/AzureKinectSensor.cpp | 26 +- .../pipelines/color_map/ColorMapUtils.cpp | 235 ++++++------ .../pipelines/color_map/NonRigidOptimizer.cpp | 237 +++++++----- .../pipelines/color_map/RigidOptimizer.cpp | 94 ++--- .../integration/UniformTSDFVolume.cpp | 279 +++++++------- cpp/open3d/pipelines/odometry/Odometry.cpp | 327 ++++++++-------- .../registration/FastGlobalRegistration.cpp | 59 +-- cpp/open3d/pipelines/registration/Feature.cpp | 161 ++++---- .../pipelines/registration/GeneralizedICP.cpp | 16 +- .../pipelines/registration/Registration.cpp | 357 +++++++++++------- .../pipelines/registration/Registration.h | 5 + cpp/open3d/t/geometry/PointCloud.cpp | 4 +- .../t/geometry/kernel/PointCloudCPU.cpp | 13 +- cpp/open3d/t/geometry/kernel/UVUnwrapping.cpp | 3 + .../t/geometry/kernel/VoxelBlockGridImpl.h | 20 +- cpp/open3d/t/io/sensor/RGBDVideoReader.cpp | 15 +- .../pipelines/kernel/FillInLinearSystemImpl.h | 56 ++- .../t/pipelines/kernel/RGBDOdometryCPU.cpp | 338 ++++++++--------- .../t/pipelines/kernel/RegistrationCPU.cpp | 232 ++++++------ .../t/pipelines/registration/Feature.cpp | 26 +- cpp/open3d/utility/Eigen.cpp | 172 +++++---- cpp/open3d/utility/Parallel.cpp | 15 +- cpp/open3d/utility/Parallel.h | 12 +- cpp/open3d/utility/ProgressBar.cpp | 25 ++ cpp/open3d/utility/ProgressBar.h | 14 + cpp/open3d/utility/Random.cpp | 14 +- cpp/open3d/utility/Random.h | 18 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/utility/ProgressBar.cpp | 17 + examples/cpp/CMakeLists.txt | 12 +- examples/cpp/OpenMP.cpp | 2 +- examples/cpp/RealSenseBagReader.cpp | 15 +- examples/cpp/TrimMeshBasedOnPointCloud.cpp | 28 +- 48 files changed, 2320 insertions(+), 1878 deletions(-) diff --git a/cpp/apps/OfflineReconstruction/LegacyReconstructionUtil.h b/cpp/apps/OfflineReconstruction/LegacyReconstructionUtil.h index 26d30bfe182..4070e808e39 100644 --- a/cpp/apps/OfflineReconstruction/LegacyReconstructionUtil.h +++ b/cpp/apps/OfflineReconstruction/LegacyReconstructionUtil.h @@ -8,6 +8,8 @@ #pragma once #include +#include +#include #include "DebugUtil.h" #include "FileSystemUtil.h" @@ -547,12 +549,15 @@ class ReconstructionPipeline { const size_t num_pairs = fragment_matching_results.size(); if (config_["multi_threading"].asBool()) { -#pragma omp parallel for num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)num_pairs; i++) { - RegisterFragmentPair(ply_files, fragment_matching_results[i].s_, - fragment_matching_results[i].t_, - fragment_matching_results[i]); - } + tbb::parallel_for(tbb::blocked_range(0, num_pairs), + [&](const tbb::blocked_range& range){ + for (std::size_t i = range.begin(); i < range.end(); ++i) { + RegisterFragmentPair(ply_files, + fragment_matching_results[i].s_, + fragment_matching_results[i].t_, + fragment_matching_results[i]); + } + }); } else { for (size_t i = 0; i < num_pairs; i++) { RegisterFragmentPair(ply_files, fragment_matching_results[i].s_, @@ -635,13 +640,16 @@ class ReconstructionPipeline { } if (config_["multi_threading"].asBool()) { -#pragma omp parallel for num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)fragment_matching_results.size(); i++) { - const int s = fragment_matching_results[i].s_; - const int t = fragment_matching_results[i].t_; - RefineFragmentPair(ply_files, s, t, - fragment_matching_results[i]); - } + tbb::parallel_for(tbb::blocked_range(0, + fragment_matching_results.size()), + [&](const tbb::blocked_range& range){ + for (std::size_t i = range.begin(); i < range.end(); ++i) { + const int s = fragment_matching_results[i].s_; + const int t = fragment_matching_results[i].t_; + RefineFragmentPair(ply_files, s, t, + fragment_matching_results[i]); + } + }); } else { for (size_t i = 0; i < fragment_matching_results.size(); i++) { const int s = fragment_matching_results[i].s_; @@ -722,37 +730,70 @@ class ReconstructionPipeline { matched_result.information_ = std::get<1>(result); } + struct PointCloudIntegrator { + // Globals + ReconstructionPipeline& pipeline; + const int fragment_id; + const std::vector& color_files; + const std::vector& depth_files; + const camera::PinholeCameraIntrinsic& intrinsic; + using PoseGraphT = pipelines::registration::PoseGraph; + const PoseGraphT& pose_graph; + // Locals + geometry::PointCloud fragment; + + PointCloudIntegrator(ReconstructionPipeline& pipeline_, + const int fragment_id_, + const std::vector& color_files_, + const std::vector& depth_files_, + const camera::PinholeCameraIntrinsic& intrinsic_, + const PoseGraphT& pose_graph_) + : pipeline(pipeline_), fragment_id(fragment_id_), + color_files(color_files_), depth_files(depth_files_), + intrinsic(intrinsic_), pose_graph(pose_graph_) {} + + PointCloudIntegrator(PointCloudIntegrator& o, tbb::split) + : pipeline(o.pipeline), fragment_id(o.fragment_id), + color_files(o.color_files), depth_files(o.depth_files), + intrinsic(o.intrinsic), pose_graph(o.pose_graph) {} + + void operator()(const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + const int i_abs = fragment_id * + pipeline.config_["n_frames_per_fragment"].asInt() + i; + utility::LogInfo("Fragment {:03d} / {:03d} :: " + "Integrate rgbd frame {:d} ({:d} of {:d}).", + fragment_id, pipeline.n_fragments_ - 1, i_abs, + i + 1, pose_graph.nodes_.size()); + const geometry::RGBDImage rgbd = pipeline.ReadRGBDImage( + color_files[i_abs], depth_files[i_abs], false); + auto pcd = geometry::PointCloud::CreateFromRGBDImage( + rgbd, intrinsic, Eigen::Matrix4d::Identity(), true); + pcd->Transform(pose_graph.nodes_[i].pose_); + } + } + + void join(PointCloudIntegrator& rhs) { + fragment += rhs.fragment; + } + }; + void IntegrateFragmentRGBD( int fragment_id, const std::vector& color_files, const std::vector& depth_files, const camera::PinholeCameraIntrinsic& intrinsic, const pipelines::registration::PoseGraph& pose_graph) { - geometry::PointCloud fragment; - const size_t graph_num = pose_graph.nodes_.size(); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < int(graph_num); ++i) { - const int i_abs = - fragment_id * config_["n_frames_per_fragment"].asInt() + i; - utility::LogInfo( - "Fragment {:03d} / {:03d} :: Integrate rgbd frame {:d} " - "({:d} " - "of " - "{:d}).", - fragment_id, n_fragments_ - 1, i_abs, i + 1, graph_num); - const geometry::RGBDImage rgbd = ReadRGBDImage( - color_files[i_abs], depth_files[i_abs], false); - auto pcd = geometry::PointCloud::CreateFromRGBDImage( - rgbd, intrinsic, Eigen::Matrix4d::Identity(), true); - pcd->Transform(pose_graph.nodes_[i].pose_); -#pragma omp critical - { fragment += *pcd; } - } - - const geometry::PointCloud fragment_down = *fragment.VoxelDownSample( - config_["tsdf_cubic_size"].asDouble() / 512.0); + const size_t graph_num = pose_graph.nodes_.size(); + PointCloudIntegrator reducer(*this, fragment_id, color_files, + depth_files, intrinsic, pose_graph); + tbb::parallel_reduce(tbb::blocked_range( + 0, graph_num), reducer); + + const geometry::PointCloud fragment_down = + *reducer.fragment.VoxelDownSample( + config_["tsdf_cubic_size"].asDouble() / 512.0); io::WritePointCloud( utility::filesystem::JoinPath( config_["path_dataset"].asString(), diff --git a/cpp/open3d/core/ParallelFor.h b/cpp/open3d/core/ParallelFor.h index d9bbe376684..ba8104fda89 100644 --- a/cpp/open3d/core/ParallelFor.h +++ b/cpp/open3d/core/ParallelFor.h @@ -21,6 +21,8 @@ #include #include "open3d/core/CUDAUtils.h" +#else +#include #endif namespace open3d { @@ -79,10 +81,17 @@ void ParallelForCPU_(const Device& device, int64_t n, const func_t& func) { return; } -#pragma omp parallel for num_threads(utility::EstimateMaxThreads()) - for (int64_t i = 0; i < n; ++i) { - func(i); - } +// #pragma omp parallel for num_threads(utility::EstimateMaxThreads()) +// for (int64_t i = 0; i < n; ++i) { +// func(i); +// } + + tbb::parallel_for(tbb::blocked_range(0, n, 32), + [&func](const tbb::blocked_range& range){ + for (int64_t i = range.begin(); i < range.end(); ++i) { + func(i); + } + }); } #endif diff --git a/cpp/open3d/core/hashmap/CPU/CPUHashBackendBuffer.cpp b/cpp/open3d/core/hashmap/CPU/CPUHashBackendBuffer.cpp index 831b65fd2cc..86a23c7e285 100644 --- a/cpp/open3d/core/hashmap/CPU/CPUHashBackendBuffer.cpp +++ b/cpp/open3d/core/hashmap/CPU/CPUHashBackendBuffer.cpp @@ -8,16 +8,21 @@ #include "open3d/core/hashmap/HashBackendBuffer.h" #include "open3d/utility/Parallel.h" +#include + namespace open3d { namespace core { void CPUResetHeap(Tensor& heap) { uint32_t* heap_ptr = heap.GetDataPtr(); int64_t capacity = heap.GetLength(); -#pragma omp parallel for num_threads(utility::EstimateMaxThreads()) - for (int64_t i = 0; i < capacity; ++i) { - heap_ptr[i] = i; - } + tbb::parallel_for(tbb::blocked_range( + 0, capacity, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (int64_t i = range.begin(); i < range.end(); ++i) { + heap_ptr[i] = i; + } + }); }; } // namespace core } // namespace open3d diff --git a/cpp/open3d/core/hashmap/CPU/TBBHashBackend.h b/cpp/open3d/core/hashmap/CPU/TBBHashBackend.h index f562dc2cc9d..b72ed98bf3d 100644 --- a/cpp/open3d/core/hashmap/CPU/TBBHashBackend.h +++ b/cpp/open3d/core/hashmap/CPU/TBBHashBackend.h @@ -8,6 +8,7 @@ #pragma once #include +#include #include #include @@ -93,15 +94,17 @@ void TBBHashBackend::Find(const void* input_keys, int64_t count) { const Key* input_keys_templated = static_cast(input_keys); -#pragma omp parallel for num_threads(utility::EstimateMaxThreads()) - for (int64_t i = 0; i < count; ++i) { - const Key& key = input_keys_templated[i]; + tbb::parallel_for(tbb::blocked_range(0, count, 64), + [=, &impl=impl_](const tbb::blocked_range& range){ + for (int64_t i = range.begin(); i < range.end(); ++i) { + const Key& key = input_keys_templated[i]; - auto iter = impl_->find(key); - bool flag = (iter != impl_->end()); - output_masks[i] = flag; - output_buf_indices[i] = flag ? iter->second : 0; - } + auto iter = impl->find(key); + bool flag = (iter != impl->end()); + output_masks[i] = flag; + output_buf_indices[i] = flag ? iter->second : 0; + } + }); } template @@ -177,43 +180,45 @@ void TBBHashBackend::Insert( size_t n_values = input_values_soa.size(); -#pragma omp parallel for num_threads(utility::EstimateMaxThreads()) - for (int64_t i = 0; i < count; ++i) { - output_buf_indices[i] = 0; - output_masks[i] = false; + tbb::parallel_for(tbb::blocked_range(0, count, 64), + [&](const tbb::blocked_range& range){ + for (int64_t i = range.begin(); i < range.end(); ++i) { + output_buf_indices[i] = 0; + output_masks[i] = false; - const Key& key = input_keys_templated[i]; + const Key& key = input_keys_templated[i]; - // Try to insert a dummy buffer index. - auto res = impl_->insert({key, 0}); + // Try to insert a dummy buffer index. + auto res = impl_->insert({key, 0}); - // Lazy copy key value pair to buffer only if succeeded - if (res.second) { - buf_index_t buf_index = buffer_accessor_->DeviceAllocate(); - void* key_ptr = buffer_accessor_->GetKeyPtr(buf_index); + // Lazy copy key value pair to buffer only if succeeded + if (res.second) { + buf_index_t buf_index = buffer_accessor_->DeviceAllocate(); + void* key_ptr = buffer_accessor_->GetKeyPtr(buf_index); - // Copy templated key to buffer - *static_cast(key_ptr) = key; + // Copy templated key to buffer + *static_cast(key_ptr) = key; - // Copy/reset non-templated value in buffer - for (size_t j = 0; j < n_values; ++j) { - uint8_t* dst_value = static_cast( - buffer_accessor_->GetValuePtr(buf_index, j)); + // Copy/reset non-templated value in buffer + for (size_t j = 0; j < n_values; ++j) { + uint8_t* dst_value = static_cast( + buffer_accessor_->GetValuePtr(buf_index, j)); - const uint8_t* src_value = - static_cast(input_values_soa[j]) + - this->value_dsizes_[j] * i; - std::memcpy(dst_value, src_value, this->value_dsizes_[j]); - } + const uint8_t* src_value = + static_cast(input_values_soa[j]) + + this->value_dsizes_[j] * i; + std::memcpy(dst_value, src_value, this->value_dsizes_[j]); + } - // Update from dummy 0 - res.first->second = buf_index; + // Update from dummy 0 + res.first->second = buf_index; - // Write to return variables - output_buf_indices[i] = buf_index; - output_masks[i] = true; + // Write to return variables + output_buf_indices[i] = buf_index; + output_masks[i] = true; + } } - } + }); } template diff --git a/cpp/open3d/core/kernel/NonZeroCPU.cpp b/cpp/open3d/core/kernel/NonZeroCPU.cpp index 4bea5919154..67618aca5d0 100644 --- a/cpp/open3d/core/kernel/NonZeroCPU.cpp +++ b/cpp/open3d/core/kernel/NonZeroCPU.cpp @@ -12,6 +12,8 @@ #include "open3d/utility/Logging.h" #include "open3d/utility/Parallel.h" +#include + namespace open3d { namespace core { namespace kernel { @@ -46,17 +48,19 @@ Tensor NonZeroCPU(const Tensor& src) { std::vector> non_zero_indices_by_dimensions( num_dims, std::vector(num_non_zeros, 0)); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int64_t i = 0; i < static_cast(num_non_zeros); i++) { - int64_t non_zero_index = non_zero_indices[i]; - for (int64_t dim = num_dims - 1; dim >= 0; dim--) { - void* result_ptr = result_iter.GetPtr(dim * num_non_zeros + i); - OPEN3D_ASSERT(result_ptr != nullptr && "Internal error."); - *static_cast(result_ptr) = non_zero_index % shape[dim]; - non_zero_index = non_zero_index / shape[dim]; + tbb::parallel_for(tbb::blocked_range( + 0, num_non_zeros, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (int64_t i = range.begin(); i < range.end(); i++) { + int64_t non_zero_index = non_zero_indices[i]; + for (int64_t dim = num_dims - 1; dim >= 0; dim--) { + void* result_ptr = result_iter.GetPtr(dim * num_non_zeros + i); + OPEN3D_ASSERT(result_ptr != nullptr && "Internal error."); + *static_cast(result_ptr) = non_zero_index % shape[dim]; + non_zero_index = non_zero_index / shape[dim]; + } } - } + }); return result; } diff --git a/cpp/open3d/core/kernel/ReductionCPU.cpp b/cpp/open3d/core/kernel/ReductionCPU.cpp index 7caa60b34f7..cf4c7f12e85 100644 --- a/cpp/open3d/core/kernel/ReductionCPU.cpp +++ b/cpp/open3d/core/kernel/ReductionCPU.cpp @@ -14,6 +14,9 @@ #include "open3d/utility/Logging.h" #include "open3d/utility/Parallel.h" +#include +#include + namespace open3d { namespace core { namespace kernel { @@ -76,9 +79,7 @@ class CPUReductionEngine { void Run(const func_t& reduce_func, scalar_t identity) { // See: PyTorch's TensorIterator::parallel_reduce for the reference // design of reduction strategy. - if (utility::EstimateMaxThreads() == 1 || utility::InParallel()) { - LaunchReductionKernelSerial(indexer_, reduce_func); - } else if (indexer_.NumOutputElements() <= 1) { + if (indexer_.NumOutputElements() <= 1) { LaunchReductionKernelTwoPass(indexer_, reduce_func, identity); } else { @@ -111,29 +112,19 @@ class CPUReductionEngine { "Internal error: two-pass reduction only works for " "single-output reduction ops."); } - int64_t num_workloads = indexer.NumWorkloads(); - int64_t num_threads = utility::EstimateMaxThreads(); - int64_t workload_per_thread = - (num_workloads + num_threads - 1) / num_threads; - std::vector thread_results(num_threads, identity); - -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int64_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { - int64_t start = thread_idx * workload_per_thread; - int64_t end = std::min(start + workload_per_thread, num_workloads); - for (int64_t workload_idx = start; workload_idx < end; - ++workload_idx) { + scalar_t& dst = *reinterpret_cast(indexer.GetOutputPtr(0)); + dst = tbb::parallel_reduce(tbb::blocked_range( + 0, indexer.NumWorkloads(), utility::DefaultGrainSizeTBB()), + identity, [&](const tbb::blocked_range& range, + scalar_t so_far){ + for (int64_t workload_idx = range.begin(); + workload_idx < range.end(); ++workload_idx) { scalar_t* src = reinterpret_cast( indexer.GetInputPtr(0, workload_idx)); - thread_results[thread_idx] = - element_kernel(*src, thread_results[thread_idx]); + so_far = element_kernel(*src, so_far); } - } - scalar_t* dst = reinterpret_cast(indexer.GetOutputPtr(0)); - for (int64_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { - *dst = element_kernel(thread_results[thread_idx], *dst); - } + return so_far; + }, element_kernel); } template @@ -164,13 +155,16 @@ class CPUReductionEngine { "LaunchReductionKernelTwoPass instead."); } -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int64_t i = 0; i < indexer_shape[best_dim]; ++i) { - Indexer sub_indexer(indexer); - sub_indexer.ShrinkDim(best_dim, i, 1); - LaunchReductionKernelSerial(sub_indexer, element_kernel); - } + // TODO: could theoretically do inner reductions in parallel too with TBB + tbb::parallel_for(tbb::blocked_range( + 0, indexer_shape[best_dim], 1), + [&](const tbb::blocked_range& range){ + for (int64_t i = range.begin(); i < range.end(); ++i) { + Indexer sub_indexer(indexer); + sub_indexer.ShrinkDim(best_dim, i, 1); + LaunchReductionKernelSerial(sub_indexer, element_kernel); + } + }); } private: @@ -191,25 +185,37 @@ class CPUArgReductionEngine { // sub-iteration. int64_t num_output_elements = indexer_.NumOutputElements(); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int64_t output_idx = 0; output_idx < num_output_elements; - output_idx++) { - // sub_indexer.NumWorkloads() == ipo. - // sub_indexer's workload_idx is indexer_'s ipo_idx. - Indexer sub_indexer = indexer_.GetPerOutputIndexer(output_idx); - scalar_t dst_val = identity; - for (int64_t workload_idx = 0; - workload_idx < sub_indexer.NumWorkloads(); workload_idx++) { - int64_t src_idx = workload_idx; - scalar_t* src_val = reinterpret_cast( - sub_indexer.GetInputPtr(0, workload_idx)); - int64_t* dst_idx = reinterpret_cast( - sub_indexer.GetOutputPtr(0, workload_idx)); - std::tie(*dst_idx, dst_val) = - reduce_func(src_idx, *src_val, *dst_idx, dst_val); + tbb::parallel_for(tbb::blocked_range( + 0, num_output_elements, 1), + [&](const tbb::blocked_range& range){ + for (int64_t output_idx = range.begin(); + output_idx < range.end(); ++output_idx) { + // sub_indexer.NumWorkloads() == ipo. + // sub_indexer's workload_idx is indexer_'s ipo_idx. + Indexer sub_indexer = indexer_.GetPerOutputIndexer(output_idx); + using result_t = std::pair; + result_t val_idx{identity, identity}; + val_idx = tbb::parallel_deterministic_reduce( + tbb::blocked_range(0, sub_indexer.NumWorkloads(), + utility::DefaultGrainSizeTBB()), val_idx, + [&](const tbb::blocked_range& range, + result_t so_far){ + for (int64_t workload_idx = range.begin(); + workload_idx < range.end(); ++workload_idx) { + scalar_t& src_val = *reinterpret_cast( + sub_indexer.GetInputPtr(0, workload_idx)); + so_far = reduce_func(workload_idx, src_val, + std::get<0>(so_far), std::get<1>(so_far)); + } + return so_far; + }, [&reduce_func](result_t a, result_t b) { + return reduce_func(std::get<0>(a), std::get<1>(a), + std::get<0>(b), std::get<1>(b)); + }); + *reinterpret_cast(sub_indexer.GetOutputPtr(0)) = + std::get<1>(val_idx); } - } + }); } private: diff --git a/cpp/open3d/geometry/EstimateNormals.cpp b/cpp/open3d/geometry/EstimateNormals.cpp index 5bc9abc0ba3..3f1ddd6bda2 100644 --- a/cpp/open3d/geometry/EstimateNormals.cpp +++ b/cpp/open3d/geometry/EstimateNormals.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/PointCloud.h" @@ -298,22 +299,25 @@ void PointCloud::EstimateNormals( } else { covariances = covariances_; } -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)covariances.size(); i++) { - auto normal = ComputeNormal(covariances[i], fast_normal_computation); - if (normal.norm() == 0.0) { - if (has_normal) { - normal = normals_[i]; - } else { - normal = Eigen::Vector3d(0.0, 0.0, 1.0); + + tbb::parallel_for(tbb::blocked_range( + 0, covariances.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (std::size_t i = range.begin(); i < range.end(); ++i) { + auto normal = ComputeNormal(covariances[i], fast_normal_computation); + if (normal.norm() == 0.0) { + if (has_normal) { + normal = normals_[i]; + } else { + normal = Eigen::Vector3d(0.0, 0.0, 1.0); + } } + if (has_normal && normal.dot(normals_[i]) < 0.0) { + normal *= -1.0; + } + normals_[i] = normal; } - if (has_normal && normal.dot(normals_[i]) < 0.0) { - normal *= -1.0; - } - normals_[i] = normal; - } + }); } void PointCloud::OrientNormalsToAlignWithDirection( @@ -323,16 +327,18 @@ void PointCloud::OrientNormalsToAlignWithDirection( utility::LogError( "No normals in the PointCloud. Call EstimateNormals() first."); } -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)points_.size(); i++) { - auto &normal = normals_[i]; - if (normal.norm() == 0.0) { - normal = orientation_reference; - } else if (normal.dot(orientation_reference) < 0.0) { - normal *= -1.0; + tbb::parallel_for(tbb::blocked_range( + 0, points_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (std::size_t i = range.begin(); i < range.end(); ++i) { + auto &normal = normals_[i]; + if (normal.norm() == 0.0) { + normal = orientation_reference; + } else if (normal.dot(orientation_reference) < 0.0) { + normal *= -1.0; + } } - } + }); } void PointCloud::OrientNormalsTowardsCameraLocation( @@ -341,22 +347,26 @@ void PointCloud::OrientNormalsTowardsCameraLocation( utility::LogError( "No normals in the PointCloud. Call EstimateNormals() first."); } -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)points_.size(); i++) { - Eigen::Vector3d orientation_reference = camera_location - points_[i]; - auto &normal = normals_[i]; - if (normal.norm() == 0.0) { - normal = orientation_reference; + + tbb::parallel_for(tbb::blocked_range( + 0, points_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (std::size_t i = range.begin(); i < range.end(); ++i) { + Eigen::Vector3d orientation_reference = + camera_location - points_[i]; + auto &normal = normals_[i]; if (normal.norm() == 0.0) { - normal = Eigen::Vector3d(0.0, 0.0, 1.0); - } else { - normal.normalize(); + normal = orientation_reference; + if (normal.norm() == 0.0) { + normal = Eigen::Vector3d(0.0, 0.0, 1.0); + } else { + normal.normalize(); + } + } else if (normal.dot(orientation_reference) < 0.0) { + normal *= -1.0; } - } else if (normal.dot(orientation_reference) < 0.0) { - normal *= -1.0; } - } + }); } void PointCloud::OrientNormalsConsistentTangentPlane( diff --git a/cpp/open3d/geometry/ISSKeypoints.cpp b/cpp/open3d/geometry/ISSKeypoints.cpp index f47abae1ad2..db4f633fc1a 100644 --- a/cpp/open3d/geometry/ISSKeypoints.cpp +++ b/cpp/open3d/geometry/ISSKeypoints.cpp @@ -18,11 +18,15 @@ #include #include +#include +#include + #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/Keypoint.h" #include "open3d/geometry/PointCloud.h" #include "open3d/utility/Eigen.h" #include "open3d/utility/Logging.h" +#include "open3d/utility/Parallel.h" namespace open3d { @@ -82,52 +86,59 @@ std::shared_ptr ComputeISSKeypoints( } std::vector third_eigen_values(points.size()); -#pragma omp parallel for schedule(static) shared(third_eigen_values) - for (int i = 0; i < (int)points.size(); i++) { - std::vector indices; - std::vector dist; - int nb_neighbors = - kdtree.SearchRadius(points[i], salient_radius, indices, dist); - if (nb_neighbors < min_neighbors) { - continue; - } + tbb::parallel_for(tbb::blocked_range( + 0, points.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices; + std::vector dist; + int nb_neighbors = + kdtree.SearchRadius(points[i], + salient_radius, indices, dist); + if (nb_neighbors < min_neighbors) { + continue; + } - Eigen::Matrix3d cov = utility::ComputeCovariance(points, indices); - if (cov.isZero()) { - continue; - } + Eigen::Matrix3d cov = utility::ComputeCovariance(points, indices); + if (cov.isZero()) { + continue; + } - Eigen::SelfAdjointEigenSolver solver(cov); - const double& e1c = solver.eigenvalues()[2]; - const double& e2c = solver.eigenvalues()[1]; - const double& e3c = solver.eigenvalues()[0]; + Eigen::SelfAdjointEigenSolver solver(cov); + const double& e1c = solver.eigenvalues()[2]; + const double& e2c = solver.eigenvalues()[1]; + const double& e3c = solver.eigenvalues()[0]; - if ((e2c / e1c) < gamma_21 && e3c / e2c < gamma_32) { - third_eigen_values[i] = e3c; + if ((e2c / e1c) < gamma_21 && e3c / e2c < gamma_32) { + third_eigen_values[i] = e3c; + } } - } + }); - std::vector kp_indices; + tbb::concurrent_vector kp_indices; kp_indices.reserve(points.size()); -#pragma omp parallel for schedule(static) shared(kp_indices) - for (int i = 0; i < (int)points.size(); i++) { - if (third_eigen_values[i] > 0.0) { - std::vector nn_indices; - std::vector dist; - int nb_neighbors = kdtree.SearchRadius(points[i], non_max_radius, - nn_indices, dist); - - if (nb_neighbors >= min_neighbors && - IsLocalMaxima(i, nn_indices, third_eigen_values)) { -#pragma omp critical - kp_indices.emplace_back(i); + tbb::parallel_for(tbb::blocked_range( + 0, points.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + if (third_eigen_values[i] > 0.0) { + std::vector nn_indices; + std::vector dist; + int nb_neighbors = kdtree.SearchRadius( + points[i], non_max_radius, nn_indices, dist); + + if (nb_neighbors >= min_neighbors && + IsLocalMaxima(i, nn_indices, third_eigen_values)) { + kp_indices.emplace_back(i); + } } } - } + }); + utility::LogDebug("[ComputeISSKeypoints] Extracted {} keypoints", kp_indices.size()); - return input.SelectByIndex(kp_indices); + return input.SelectByIndex({kp_indices.begin(), kp_indices.end()}); } } // namespace keypoint diff --git a/cpp/open3d/geometry/Image.cpp b/cpp/open3d/geometry/Image.cpp index f14f709a108..f8d6b265fb5 100644 --- a/cpp/open3d/geometry/Image.cpp +++ b/cpp/open3d/geometry/Image.cpp @@ -7,6 +7,8 @@ #include "open3d/geometry/Image.h" +#include + #include "open3d/utility/Parallel.h" namespace { @@ -133,27 +135,24 @@ std::shared_ptr Image::Downsample() const { if (num_of_channels_ != 1 || bytes_per_channel_ != 4) { utility::LogError("Unsupported image format."); } - int half_width = (int)floor((double)width_ / 2.0); - int half_height = (int)floor((double)height_ / 2.0); - output->Prepare(half_width, half_height, 1, 4); - -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int y = 0; y < output->height_; y++) { - for (int x = 0; x < output->width_; x++) { - float *p1 = PointerAt(x * 2, y * 2); - float *p2 = PointerAt(x * 2 + 1, y * 2); - float *p3 = PointerAt(x * 2, y * 2 + 1); - float *p4 = PointerAt(x * 2 + 1, y * 2 + 1); - float *p = output->PointerAt(x, y); - *p = (*p1 + *p2 + *p3 + *p4) / 4.0f; + // Integer division always rounds towards zero + output->Prepare(width_ / 2, height_ / 2, 1, 4); + + tbb::parallel_for(tbb::blocked_range2d( + 0, output->height_, utility::DefaultGrainSizeTBB(), + 0, output->width_, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range2d& range){ + for (int y = range.rows().begin(); y < range.rows().end(); ++y) { + for (int x = range.cols().begin(); x < range.cols().end(); ++x) { + float *p1 = PointerAt(x * 2, y * 2); + float *p2 = PointerAt(x * 2 + 1, y * 2); + float *p3 = PointerAt(x * 2, y * 2 + 1); + float *p4 = PointerAt(x * 2 + 1, y * 2 + 1); + float *p = output->PointerAt(x, y); + *p = (*p1 + *p2 + *p3 + *p4) / 4.0f; + } } - } + }); return output; } @@ -169,28 +168,30 @@ std::shared_ptr Image::FilterHorizontal( output->Prepare(width_, height_, 1, 4); const int half_kernel_size = (int)(floor((double)kernel.size() / 2.0)); - -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int y = 0; y < height_; y++) { - for (int x = 0; x < width_; x++) { - float *po = output->PointerAt(x, y, 0); - double temp = 0; - for (int i = -half_kernel_size; i <= half_kernel_size; i++) { - int x_shift = x + i; - if (x_shift < 0) x_shift = 0; - if (x_shift > width_ - 1) x_shift = width_ - 1; - float *pi = PointerAt(x_shift, y, 0); - temp += (*pi * (float)kernel[i + half_kernel_size]); + // Define the grain size based on the width of the memory read + // Want no more than 1/4 of the read to be the halo cells + const std::size_t multiple = (utility::DefaultGrainSizeTBB() + + 4 * (kernel.size() - 1)) / utility::DefaultGrainSizeTBB(); + const std::size_t grain_size = multiple * utility::DefaultGrainSizeTBB() + + 1 - kernel.size(); + tbb::parallel_for(tbb::blocked_range2d( + 0, height_, grain_size, 0, width_, grain_size), + [&](const tbb::blocked_range2d& range){ + for (int y = range.rows().begin(); y < range.rows().end(); ++y) { + for (int x = range.cols().begin(); x < range.cols().end(); ++x) { + float *po = output->PointerAt(x, y, 0); + double temp = 0; + for (int i = -half_kernel_size; i <= half_kernel_size; ++i) { + int x_shift = x + i; + if (x_shift < 0) x_shift = 0; + if (x_shift > width_ - 1) x_shift = width_ - 1; + float *pi = PointerAt(x_shift, y, 0); + temp += (*pi * (float)kernel[i + half_kernel_size]); + } + *po = (float)temp; } - *po = (float)temp; } - } + }); return output; } @@ -255,23 +256,21 @@ std::shared_ptr Image::Transpose() const { int in_bytes_per_line = BytesPerLine(); int bytes_per_pixel = num_of_channels_ * bytes_per_channel_; -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int y = 0; y < height_; y++) { - for (int x = 0; x < width_; x++) { - std::copy( + tbb::parallel_for(tbb::blocked_range2d( + 0, height_, utility::DefaultGrainSizeTBB(), + 0, width_, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range2d& range){ + for (int y = range.rows().begin(); y < range.rows().end(); ++y) { + for (int x = range.cols().begin(); x < range.cols().end(); ++x) { + std::copy( data_.data() + y * in_bytes_per_line + x * bytes_per_pixel, data_.data() + y * in_bytes_per_line + (x + 1) * bytes_per_pixel, output->data_.data() + x * out_bytes_per_line + y * bytes_per_pixel); + } } - } + }); return output; } @@ -281,13 +280,15 @@ std::shared_ptr Image::FlipVertical() const { output->Prepare(width_, height_, num_of_channels_, bytes_per_channel_); int bytes_per_line = BytesPerLine(); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int y = 0; y < height_; y++) { - std::copy(data_.data() + y * bytes_per_line, - data_.data() + (y + 1) * bytes_per_line, - output->data_.data() + (height_ - y - 1) * bytes_per_line); - } + tbb::parallel_for(tbb::blocked_range( + 0, height_, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (int y = range.begin(); y < range.end(); ++y) { + std::copy(data_.data() + y * bytes_per_line, + data_.data() + (y + 1) * bytes_per_line, + output->data_.data() + (height_ - y - 1) * bytes_per_line); + } + }); return output; } @@ -297,22 +298,23 @@ std::shared_ptr Image::FlipHorizontal() const { int bytes_per_line = BytesPerLine(); int bytes_per_pixel = num_of_channels_ * bytes_per_channel_; -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int y = 0; y < height_; y++) { - for (int x = 0; x < width_; x++) { - std::copy(data_.data() + y * bytes_per_line + x * bytes_per_pixel, - data_.data() + y * bytes_per_line + - (x + 1) * bytes_per_pixel, - output->data_.data() + y * bytes_per_line + - (width_ - x - 1) * bytes_per_pixel); + + tbb::parallel_for(tbb::blocked_range2d( + 0, height_, utility::DefaultGrainSizeTBB(), + 0, width_, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range2d& range){ + for (int y = range.rows().begin(); y < range.rows().end(); ++y) { + for (int x = range.cols().begin(); x < range.cols().end(); ++x) { + std::copy( + data_.data() + y * bytes_per_line + x * bytes_per_pixel, + data_.data() + y * bytes_per_line + + (x + 1) * bytes_per_pixel, + output->data_.data() + y * bytes_per_line + + (width_ - x - 1) * bytes_per_pixel); + } } - } + }); + return output; } @@ -324,30 +326,29 @@ std::shared_ptr Image::Dilate(int half_kernel_size /* = 1 */) const { } output->Prepare(width_, height_, 1, 1); -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int y = 0; y < height_; y++) { - for (int x = 0; x < width_; x++) { - for (int yy = -half_kernel_size; yy <= half_kernel_size; yy++) { - for (int xx = -half_kernel_size; xx <= half_kernel_size; xx++) { - unsigned char *pi; - if (TestImageBoundary(x + xx, y + yy)) { - pi = PointerAt(x + xx, y + yy); - if (*pi == 255) { - *output->PointerAt(x, y, 0) = 255; - xx = half_kernel_size; - yy = half_kernel_size; + tbb::parallel_for(tbb::blocked_range2d( + 0, height_, utility::DefaultGrainSizeTBB() - (2 * half_kernel_size), + 0, width_, utility::DefaultGrainSizeTBB() - (2 * half_kernel_size)), + [&](const tbb::blocked_range2d& range){ + for (int y = range.rows().begin(); y < range.rows().end(); ++y) { + for (int x = range.cols().begin(); x < range.cols().end(); ++x) { + for (int yy = -half_kernel_size; yy <= half_kernel_size; ++yy) { + for (int xx = -half_kernel_size; xx <= half_kernel_size; ++xx) { + unsigned char *pi; + if (TestImageBoundary(x + xx, y + yy)) { + pi = PointerAt(x + xx, y + yy); + if (*pi == 255) { + *output->PointerAt(x, y, 0) = 255; + xx = half_kernel_size; + yy = half_kernel_size; + } } } } } } - } + }); + return output; } @@ -364,25 +365,24 @@ std::shared_ptr Image::CreateDepthBoundaryMask( auto mask = std::make_shared(); mask->Prepare(width, height, 1, 1); -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int v = 0; v < height; v++) { - for (int u = 0; u < width; u++) { - double dx = *depth_image_gradient_dx->PointerAt(u, v); - double dy = *depth_image_gradient_dy->PointerAt(u, v); - double mag = sqrt(dx * dx + dy * dy); - if (mag > depth_threshold_for_discontinuity_check) { - *mask->PointerAt(u, v) = 255; - } else { - *mask->PointerAt(u, v) = 0; + tbb::parallel_for(tbb::blocked_range2d( + 0, height, utility::DefaultGrainSizeTBB(), + 0, width, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range2d& range){ + for (int v = range.rows().begin(); v < range.rows().end(); v++) { + for (int u = range.cols().begin(); u < range.cols().end(); u++) { + double dx = *depth_image_gradient_dx->PointerAt(u, v); + double dy = *depth_image_gradient_dy->PointerAt(u, v); + double mag = sqrt(dx * dx + dy * dy); + if (mag > depth_threshold_for_discontinuity_check) { + *mask->PointerAt(u, v) = 255; + } else { + *mask->PointerAt(u, v) = 0; + } } } - } + }); + if (half_dilation_kernel_size_for_discontinuity_map >= 1) { auto mask_dilated = mask->Dilate(half_dilation_kernel_size_for_discontinuity_map); diff --git a/cpp/open3d/geometry/PointCloud.cpp b/cpp/open3d/geometry/PointCloud.cpp index b0b6e0130ae..0f7f4826de0 100644 --- a/cpp/open3d/geometry/PointCloud.cpp +++ b/cpp/open3d/geometry/PointCloud.cpp @@ -11,6 +11,9 @@ #include #include +#include +#include + #include "open3d/geometry/BoundingVolume.h" #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/Qhull.h" @@ -127,20 +130,22 @@ std::vector PointCloud::ComputePointCloudDistance( std::vector distances(points_.size()); KDTreeFlann kdtree; kdtree.SetGeometry(target); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)points_.size(); i++) { - std::vector indices(1); - std::vector dists(1); - if (kdtree.SearchKNN(points_[i], 1, indices, dists) == 0) { - utility::LogDebug( - "[ComputePointCloudToPointCloudDistance] Found a point " - "without neighbors."); - distances[i] = 0.0; - } else { - distances[i] = std::sqrt(dists[0]); + tbb::parallel_for(tbb::blocked_range( + 0, points_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices(1); + std::vector dists(1); + if (kdtree.SearchKNN(points_[i], 1, indices, dists) == 0) { + utility::LogDebug( + "[ComputePointCloudToPointCloudDistance] Found a point " + "without neighbors."); + distances[i] = 0.0; + } else { + distances[i] = std::sqrt(dists[0]); + } } - } + }); return distances; } @@ -498,9 +503,9 @@ std::shared_ptr PointCloud::RandomDownSample( std::vector indices(points_.size()); std::iota(std::begin(indices), std::end(indices), (size_t)0); { - std::lock_guard lock(*utility::random::GetMutex()); + tbb::spin_mutex::scoped_lock lock(utility::random::GetMutex()); std::shuffle(indices.begin(), indices.end(), - *utility::random::GetEngine()); + utility::random::GetEngine()); } indices.resize((int)(sampling_ratio * points_.size())); return SelectByIndex(indices); @@ -574,18 +579,22 @@ PointCloud::RemoveRadiusOutliers(size_t nb_points, KDTreeFlann kdtree; kdtree.SetGeometry(*this); std::vector mask = std::vector(points_.size()); - utility::OMPProgressBar progress_bar( + utility::TBBProgressBar progress_bar( points_.size(), "Remove radius outliers: ", print_progress); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < int(points_.size()); i++) { - std::vector tmp_indices; - std::vector dist; - size_t nb_neighbors = kdtree.SearchRadius(points_[i], search_radius, - tmp_indices, dist); - mask[i] = (nb_neighbors > nb_points); - ++progress_bar; - } + + tbb::parallel_for(tbb::blocked_range( + 0, points_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector tmp_indices; + std::vector dist; + size_t nb_neighbors = kdtree.SearchRadius(points_[i], + search_radius, tmp_indices, dist); + mask[i] = (nb_neighbors > nb_points); + ++progress_bar; + } + }); + std::vector indices; for (size_t i = 0; i < mask.size(); i++) { if (mask[i]) { @@ -612,25 +621,33 @@ PointCloud::RemoveStatisticalOutliers(size_t nb_neighbors, kdtree.SetGeometry(*this); std::vector avg_distances = std::vector(points_.size()); std::vector indices; - size_t valid_distances = 0; - utility::OMPProgressBar progress_bar( + + utility::TBBProgressBar progress_bar( points_.size(), "Remove statistical outliers: ", print_progress); -#pragma omp parallel for reduction(+ : valid_distances) schedule(static) num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < int(points_.size()); i++) { - std::vector tmp_indices; - std::vector dist; - kdtree.SearchKNN(points_[i], int(nb_neighbors), tmp_indices, dist); - double mean = -1.0; - if (dist.size() > 0u) { - valid_distances++; - std::for_each(dist.begin(), dist.end(), - [](double &d) { d = std::sqrt(d); }); - mean = std::accumulate(dist.begin(), dist.end(), 0.0) / dist.size(); + size_t valid_distances = tbb::parallel_reduce( + tbb::blocked_range(0, points_.size(), + utility::DefaultGrainSizeTBB()), std::size_t{0}, + [&](const tbb::blocked_range& range, + std::size_t valid_so_far){ + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector tmp_indices; + std::vector dist; + kdtree.SearchKNN(points_[i], int(nb_neighbors), tmp_indices, dist); + double mean = -1.0; + if (dist.size() > 0u) { + ++valid_so_far; + std::for_each(dist.begin(), dist.end(), + [](double &d) { d = std::sqrt(d); }); + mean = std::accumulate(dist.begin(), dist.end(), + 0.0) / dist.size(); + } + avg_distances[i] = mean; + ++progress_bar; } - avg_distances[i] = mean; - ++progress_bar; - } + return valid_so_far; + }, std::plus{}); + if (valid_distances == 0) { return std::make_tuple(std::make_shared(), std::vector()); @@ -646,7 +663,8 @@ PointCloud::RemoveStatisticalOutliers(size_t nb_neighbors, return x > 0 ? (x - cloud_mean) * (y - cloud_mean) : 0; }); // Bessel's correction - double std_dev = std::sqrt(sq_sum / (valid_distances - 1)); + double std_dev = std::sqrt(sq_sum / + static_cast(valid_distances - 1)); double distance_threshold = cloud_mean + std_ratio * std_dev; for (size_t i = 0; i < avg_distances.size(); i++) { if (avg_distances[i] > 0 && avg_distances[i] < distance_threshold) { @@ -665,21 +683,23 @@ std::vector PointCloud::EstimatePerPointCovariances( KDTreeFlann kdtree; kdtree.SetGeometry(input); -#pragma omp parallel for schedule(static) - for (int i = 0; i < (int)points.size(); i++) { - std::vector indices; - std::vector distance2; - if (kdtree.Search(points[i], search_param, indices, distance2) >= 3) { - auto covariance = utility::ComputeCovariance(points, indices); - if (input.HasCovariances() && covariance.isIdentity(1e-4)) { - covariances[i] = input.covariances_[i]; + tbb::parallel_for(tbb::blocked_range(0, points.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices; + std::vector distance2; + if (kdtree.Search(points[i], search_param, indices, distance2) >= 3) { + auto covariance = utility::ComputeCovariance(points, indices); + if (input.HasCovariances() && covariance.isIdentity(1e-4)) { + covariances[i] = input.covariances_[i]; + } else { + covariances[i] = covariance; + } } else { - covariances[i] = covariance; + covariances[i] = Eigen::Matrix3d::Identity(); } - } else { - covariances[i] = Eigen::Matrix3d::Identity(); } - } + }); return covariances; } void PointCloud::EstimateCovariances( @@ -704,12 +724,14 @@ std::vector PointCloud::ComputeMahalanobisDistance() const { Eigen::Matrix3d covariance; std::tie(mean, covariance) = ComputeMeanAndCovariance(); Eigen::Matrix3d cov_inv = covariance.inverse(); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)points_.size(); i++) { - Eigen::Vector3d p = points_[i] - mean; - mahalanobis[i] = std::sqrt(p.transpose() * cov_inv * p); - } + tbb::parallel_for(tbb::blocked_range( + 0, points_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + Eigen::Vector3d p = points_[i] - mean; + mahalanobis[i] = std::sqrt(p.transpose() * cov_inv * p); + } + }); return mahalanobis; } @@ -720,20 +742,22 @@ std::vector PointCloud::ComputeNearestNeighborDistance() const { std::vector nn_dis(points_.size()); KDTreeFlann kdtree(*this); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)points_.size(); i++) { - std::vector indices(2); - std::vector dists(2); - if (kdtree.SearchKNN(points_[i], 2, indices, dists) <= 1) { - utility::LogDebug( - "[ComputePointCloudNearestNeighborDistance] Found a point " - "without neighbors."); - nn_dis[i] = 0.0; - } else { - nn_dis[i] = std::sqrt(dists[1]); + tbb::parallel_for(tbb::blocked_range( + 0, points_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices(2); + std::vector dists(2); + if (kdtree.SearchKNN(points_[i], 2, indices, dists) <= 1) { + utility::LogDebug( + "[ComputePointCloudNearestNeighborDistance] " + "Found a point without neighbors."); + nn_dis[i] = 0.0; + } else { + nn_dis[i] = std::sqrt(dists[1]); + } } - } + }); return nn_dis; } diff --git a/cpp/open3d/geometry/PointCloudCluster.cpp b/cpp/open3d/geometry/PointCloudCluster.cpp index 6efad46bd50..5166b9ac244 100644 --- a/cpp/open3d/geometry/PointCloudCluster.cpp +++ b/cpp/open3d/geometry/PointCloudCluster.cpp @@ -8,6 +8,9 @@ #include #include +#include +#include + #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/PointCloud.h" #include "open3d/utility/Logging.h" @@ -27,15 +30,22 @@ std::vector PointCloud::ClusterDBSCAN(double eps, utility::ProgressBar progress_bar(points_.size(), "Precompute neighbors.", print_progress); std::vector> nbs(points_.size()); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int idx = 0; idx < int(points_.size()); ++idx) { - std::vector dists2; - kdtree.SearchRadius(points_[idx], eps, nbs[idx], dists2); - -#pragma omp critical(ClusterDBSCAN) - { ++progress_bar; } - } + + tbb::spin_mutex mtx; + tbb::profiling::set_name(mtx, "ClusterDBSCAN"); + tbb::parallel_for(tbb::blocked_range( + 0, points_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector dists2; + kdtree.SearchRadius(points_[i], eps, nbs[i], dists2); + { + tbb::spin_mutex::scoped_lock lock(mtx); + ++progress_bar; + } + } + }); + utility::LogDebug("Done Precompute neighbors."); // Set all labels to undefined (-2). diff --git a/cpp/open3d/geometry/PointCloudPlanarPatchDetection.cpp b/cpp/open3d/geometry/PointCloudPlanarPatchDetection.cpp index b76bc9f3fc4..ff435589937 100644 --- a/cpp/open3d/geometry/PointCloudPlanarPatchDetection.cpp +++ b/cpp/open3d/geometry/PointCloudPlanarPatchDetection.cpp @@ -14,6 +14,8 @@ #include #include +#include + #include "libqhullcpp/PointCoordinates.h" #include "libqhullcpp/Qhull.h" #include "libqhullcpp/QhullVertex.h" @@ -22,6 +24,7 @@ #include "open3d/geometry/KDTreeSearchParam.h" #include "open3d/geometry/PointCloud.h" #include "open3d/utility/Logging.h" +#include "open3d/utility/Parallel.h" namespace open3d { namespace geometry { @@ -993,12 +996,16 @@ PointCloud::DetectPlanarPatches( kdtree.SetGeometry(*this); std::vector> neighbors; neighbors.resize(points_.size()); -#pragma omp parallel for schedule(static) - for (int i = 0; i < static_cast(points_.size()); i++) { - std::vector indices; - std::vector distance2; - kdtree.Search(points_[i], search_param, neighbors[i], distance2); - } + + tbb::parallel_for(tbb::blocked_range( + 0, points_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices; + std::vector distance2; + kdtree.Search(points_[i], search_param, neighbors[i], distance2); + } + }); const double normal_similarity_rad = normal_similarity_deg * M_PI / 180.0; const double coplanarity_rad = coplanarity_deg * M_PI / 180.0; diff --git a/cpp/open3d/geometry/PointCloudSegmentation.cpp b/cpp/open3d/geometry/PointCloudSegmentation.cpp index 2968810cecc..e4da2a382b4 100644 --- a/cpp/open3d/geometry/PointCloudSegmentation.cpp +++ b/cpp/open3d/geometry/PointCloudSegmentation.cpp @@ -12,6 +12,9 @@ #include #include +#include +#include + #include "open3d/geometry/PointCloud.h" #include "open3d/geometry/TriangleMesh.h" #include "open3d/utility/Logging.h" @@ -60,6 +63,11 @@ class RANSACResult { RANSACResult() : fitness_(0), inlier_rmse_(0) {} ~RANSACResult() {} + bool IsBetterRANSACThan(const RANSACResult& other) { + return fitness_ > other.fitness_ || + (fitness_ == other.fitness_ && inlier_rmse_ < other.inlier_rmse_); + } + public: double fitness_; double inlier_rmse_; @@ -163,12 +171,6 @@ std::tuple> PointCloud::SegmentPlane( size_t num_points = points_.size(); RandomSampler sampler(num_points); - // Pre-generate all random samples before entering the parallel region - std::vector> all_sampled_indices; - all_sampled_indices.reserve(num_iterations); - for (int i = 0; i < num_iterations; i++) { - all_sampled_indices.push_back(sampler(ransac_n)); - } // Return if ransac_n is less than the required plane model parameters. if (ransac_n < 3) { @@ -183,57 +185,63 @@ std::tuple> PointCloud::SegmentPlane( std::vector{}); } - // Use size_t here to avoid large integer which acceed max of int. - size_t break_iteration = std::numeric_limits::max(); - int iteration_count = 0; - -#pragma omp parallel for schedule(static) - for (int itr = 0; itr < num_iterations; itr++) { - if ((size_t)iteration_count > break_iteration) { - continue; - } + // Use size_t here to avoid large integer which exceed max of int. + std::size_t break_iteration = std::numeric_limits::max(); + std::size_t iteration_count = 0; + + tbb::spin_rw_mutex mtx; + tbb::parallel_for(tbb::blocked_range(0, num_iterations, 1), + [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { + // Eschew using a mutex here because doing an extra iteration is ok + if (iteration_count > break_iteration) { + continue; + } - // Access the pre-generated sampled indices - std::vector inliers = all_sampled_indices[itr]; - - // Fit model to num_model_parameters randomly selected points among the - // inliers. - Eigen::Vector4d plane_model; - if (ransac_n == 3) { - plane_model = TriangleMesh::ComputeTrianglePlane( - points_[inliers[0]], points_[inliers[1]], - points_[inliers[2]]); - } else { - plane_model = GetPlaneFromPoints(points_, inliers); - } + const std::vector sampled_indices = sampler(ransac_n); + std::vector inliers = sampled_indices; + + // Fit model to num_model_parameters randomly selected points + // among the inliers. + Eigen::Vector4d plane_model; + if (ransac_n == 3) { + plane_model = TriangleMesh::ComputeTrianglePlane( + points_[inliers[0]], points_[inliers[1]], + points_[inliers[2]]); + } else { + plane_model = GetPlaneFromPoints(points_, inliers); + } - if (plane_model.isZero(0)) { - continue; - } + if (plane_model.isZero(0)) { + continue; + } - inliers.clear(); - auto this_result = EvaluateRANSACBasedOnDistance( - points_, plane_model, inliers, distance_threshold); -#pragma omp critical - { - if (this_result.fitness_ > result.fitness_ || - (this_result.fitness_ == result.fitness_ && - this_result.inlier_rmse_ < result.inlier_rmse_)) { - result = this_result; - best_plane_model = plane_model; - if (result.fitness_ < 1.0) { - break_iteration = std::min( - log(1 - probability) / + inliers.clear(); + auto this_result = EvaluateRANSACBasedOnDistance( + points_, plane_model, inliers, distance_threshold); + { + tbb::spin_rw_mutex::scoped_lock lock(mtx); + if (this_result.IsBetterRANSACThan(result)) { + lock.upgrade_to_writer(); + // Have to recheck after upgrading lock because + // upgrading requires releasing the lock + if (this_result.IsBetterRANSACThan(result)) { + result = this_result; + best_plane_model = plane_model; + if (result.fitness_ < 1.0) { + break_iteration = std::min(log(1 - probability) / log(1 - pow(result.fitness_, ransac_n)), - (double)num_iterations); - } else { - // Set break_iteration to 0 to force to break the loop. - break_iteration = 0; + (double)num_iterations); + } else { + // Set break_iteration to 0 so we break the loop. + break_iteration = 0; + } + } } + iteration_count++; } - iteration_count++; } - } + }); // Find the final inliers using best_plane_model. std::vector final_inliers; diff --git a/cpp/open3d/geometry/TriangleMesh.cpp b/cpp/open3d/geometry/TriangleMesh.cpp index 9e909858736..f77c1965001 100644 --- a/cpp/open3d/geometry/TriangleMesh.cpp +++ b/cpp/open3d/geometry/TriangleMesh.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "open3d/geometry/BoundingVolume.h" #include "open3d/geometry/IntersectionTest.h" @@ -927,13 +928,15 @@ TriangleMesh &TriangleMesh::MergeCloseVertices(double eps) { // precompute all neighbours utility::LogDebug("Precompute Neighbours"); std::vector> nbs(vertices_.size()); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int idx = 0; idx < int(vertices_.size()); ++idx) { - std::vector dists2; - kdtree.SearchRadius(vertices_[idx], eps, nbs[idx], dists2); - } utility::LogDebug("Done Precompute Neighbours"); + tbb::parallel_for(tbb::blocked_range( + 0, vertices_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + std::vector dists2; + for (std::size_t idx = range.begin(); idx < range.end(); ++idx) { + kdtree.SearchRadius(vertices_[idx], eps, nbs[idx], dists2); + } + }); bool has_vertex_normals = HasVertexNormals(); bool has_vertex_colors = HasVertexColors(); @@ -973,12 +976,12 @@ TriangleMesh &TriangleMesh::MergeCloseVertices(double eps) { new_vert_mapping[nb] = new_vidx; n += 1; } - new_vertices.push_back(vertex / n); + new_vertices.emplace_back(vertex / n); if (has_vertex_normals) { - new_vertex_normals.push_back(normal / n); + new_vertex_normals.emplace_back(normal / n); } if (has_vertex_colors) { - new_vertex_colors.push_back(color / n); + new_vertex_colors.emplace_back(color / n); } } utility::LogDebug("Merged {} vertices", @@ -1198,18 +1201,6 @@ double TriangleMesh::GetSurfaceArea(std::vector &triangle_areas) const { } double TriangleMesh::GetVolume() const { - // Computes the signed volume of the tetrahedron defined by - // the three triangle vertices and the origin. The sign is determined by - // checking if the origin is at the same side as the normal with respect to - // the triangle. - auto GetSignedVolumeOfTriangle = [&](size_t tidx) { - const Eigen::Vector3i &triangle = triangles_[tidx]; - const Eigen::Vector3d &vertex0 = vertices_[triangle(0)]; - const Eigen::Vector3d &vertex1 = vertices_[triangle(1)]; - const Eigen::Vector3d &vertex2 = vertices_[triangle(2)]; - return vertex0.dot(vertex1.cross(vertex2)) / 6.0; - }; - if (!IsWatertight()) { utility::LogError( "The mesh is not watertight, and the volume cannot be " @@ -1221,13 +1212,22 @@ double TriangleMesh::GetVolume() const { "computed."); } - double volume = 0; - int64_t num_triangles = triangles_.size(); -#pragma omp parallel for reduction(+ : volume) num_threads(utility::EstimateMaxThreads()) - for (int64_t tidx = 0; tidx < num_triangles; ++tidx) { - volume += GetSignedVolumeOfTriangle(tidx); - } - return std::abs(volume); + return std::abs(tbb::parallel_reduce(tbb::blocked_range( + 0, triangles_.size(), utility::DefaultGrainSizeTBB()), 0.0, + [&](const tbb::blocked_range& range, double volume){ + for (std::size_t tidx = range.begin(); tidx < range.end(); ++tidx) { + // Computes the signed volume of the tetrahedron defined by + // the three triangle vertices and the origin. The sign is determined by + // checking if the origin is at the same side as the normal with respect to + // the triangle. + const Eigen::Vector3i &triangle = triangles_[tidx]; + const Eigen::Vector3d &vertex0 = vertices_[triangle(0)]; + const Eigen::Vector3d &vertex1 = vertices_[triangle(1)]; + const Eigen::Vector3d &vertex2 = vertices_[triangle(2)]; + volume += vertex0.dot(vertex1.cross(vertex2)) / 6.0; + } + return volume; + }, std::plus{})); } Eigen::Vector4d TriangleMesh::ComputeTrianglePlane(const Eigen::Vector3d &p0, @@ -1359,45 +1359,50 @@ bool TriangleMesh::IsVertexManifold() const { std::vector TriangleMesh::GetSelfIntersectingTriangles() const { std::vector self_intersecting_triangles; - for (size_t tidx0 = 0; tidx0 < triangles_.size() - 1; ++tidx0) { - const Eigen::Vector3i &tria_p = triangles_[tidx0]; - const Eigen::Vector3d &p0 = vertices_[tria_p(0)]; - const Eigen::Vector3d &p1 = vertices_[tria_p(1)]; - const Eigen::Vector3d &p2 = vertices_[tria_p(2)]; - - const Eigen::Vector3d bb_min1 = - p0.array().min(p1.array().min(p2.array())); - const Eigen::Vector3d bb_max1 = - p0.array().max(p1.array().max(p2.array())); - - for (size_t tidx1 = tidx0 + 1; tidx1 < triangles_.size(); ++tidx1) { - const Eigen::Vector3i &tria_q = triangles_[tidx1]; - // check if neighbour triangle - if (tria_p(0) == tria_q(0) || tria_p(0) == tria_q(1) || - tria_p(0) == tria_q(2) || tria_p(1) == tria_q(0) || - tria_p(1) == tria_q(1) || tria_p(1) == tria_q(2) || - tria_p(2) == tria_q(0) || tria_p(2) == tria_q(1) || - tria_p(2) == tria_q(2)) { - continue; - } + tbb::spin_mutex out_mtx; + tbb::parallel_for(tbb::blocked_range( + 0, triangles_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (std::size_t tidx0 = range.begin(); tidx0 < range.end(); ++tidx0) { + const Eigen::Vector3i &tria_p = triangles_[tidx0]; + const Eigen::Vector3d &p0 = vertices_[tria_p(0)]; + const Eigen::Vector3d &p1 = vertices_[tria_p(1)]; + const Eigen::Vector3d &p2 = vertices_[tria_p(2)]; + + const Eigen::Vector3d bb_min1 = + p0.array().min(p1.array().min(p2.array())); + const Eigen::Vector3d bb_max1 = + p0.array().max(p1.array().max(p2.array())); + + for (size_t tidx1 = tidx0 + 1; tidx1 < triangles_.size(); ++tidx1) { + const Eigen::Vector3i &tria_q = triangles_[tidx1]; + // check if neighbour triangle + if (tria_p(0) == tria_q(0) || tria_p(0) == tria_q(1) || + tria_p(0) == tria_q(2) || tria_p(1) == tria_q(0) || + tria_p(1) == tria_q(1) || tria_p(1) == tria_q(2) || + tria_p(2) == tria_q(0) || tria_p(2) == tria_q(1) || + tria_p(2) == tria_q(2)) { + continue; + } - // check for intersection - const Eigen::Vector3d &q0 = vertices_[tria_q(0)]; - const Eigen::Vector3d &q1 = vertices_[tria_q(1)]; - const Eigen::Vector3d &q2 = vertices_[tria_q(2)]; - - const Eigen::Vector3d bb_min2 = - q0.array().min(q1.array().min(q2.array())); - const Eigen::Vector3d bb_max2 = - q0.array().max(q1.array().max(q2.array())); - if (IntersectionTest::AABBAABB(bb_min1, bb_max1, bb_min2, - bb_max2) && - IntersectionTest::TriangleTriangle3d(p0, p1, p2, q0, q1, q2)) { - self_intersecting_triangles.push_back( - Eigen::Vector2i(tidx0, tidx1)); + // check for intersection + const Eigen::Vector3d &q0 = vertices_[tria_q(0)]; + const Eigen::Vector3d &q1 = vertices_[tria_q(1)]; + const Eigen::Vector3d &q2 = vertices_[tria_q(2)]; + + const Eigen::Vector3d bb_min2 = + q0.array().min(q1.array().min(q2.array())); + const Eigen::Vector3d bb_max2 = + q0.array().max(q1.array().max(q2.array())); + if (IntersectionTest::AABBAABB(bb_min1, bb_max1, bb_min2, bb_max2) && + IntersectionTest::TriangleTriangle3d(p0, p1, p2, q0, q1, q2)) { + tbb::spin_mutex::scoped_lock lock(out_mtx); + self_intersecting_triangles.push_back( + Eigen::Vector2i(tidx0, tidx1)); + } } } - } + }); return self_intersecting_triangles; } @@ -1441,23 +1446,25 @@ TriangleMesh::ClusterConnectedTriangles() const { utility::LogDebug("[ClusterConnectedTriangles] Compute triangle adjacency"); auto edges_to_triangles = GetEdgeToTrianglesMap(); std::vector> adjacency_list(triangles_.size()); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int tidx = 0; tidx < int(triangles_.size()); ++tidx) { - const auto &triangle = triangles_[tidx]; - for (auto tnb : - edges_to_triangles[GetOrderedEdge(triangle(0), triangle(1))]) { - adjacency_list[tidx].insert(tnb); - } - for (auto tnb : - edges_to_triangles[GetOrderedEdge(triangle(0), triangle(2))]) { - adjacency_list[tidx].insert(tnb); - } - for (auto tnb : - edges_to_triangles[GetOrderedEdge(triangle(1), triangle(2))]) { - adjacency_list[tidx].insert(tnb); + tbb::parallel_for(tbb::blocked_range( + 0, triangles_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (std::size_t tidx = range.begin(); tidx < range.end(); ++tidx) { + const auto &triangle = triangles_[tidx]; + for (auto tnb : + edges_to_triangles[GetOrderedEdge(triangle(0), triangle(1))]) { + adjacency_list[tidx].insert(tnb); + } + for (auto tnb : + edges_to_triangles[GetOrderedEdge(triangle(0), triangle(2))]) { + adjacency_list[tidx].insert(tnb); + } + for (auto tnb : + edges_to_triangles[GetOrderedEdge(triangle(1), triangle(2))]) { + adjacency_list[tidx].insert(tnb); + } } - } + }); utility::LogDebug( "[ClusterConnectedTriangles] Done computing triangle adjacency"); diff --git a/cpp/open3d/geometry/TriangleMeshDeformation.cpp b/cpp/open3d/geometry/TriangleMeshDeformation.cpp index adfe76b76ed..4541c0fd7c4 100644 --- a/cpp/open3d/geometry/TriangleMeshDeformation.cpp +++ b/cpp/open3d/geometry/TriangleMeshDeformation.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include "open3d/geometry/TriangleMesh.h" #include "open3d/utility/Logging.h" #include "open3d/utility/Parallel.h" @@ -93,72 +95,77 @@ std::shared_ptr TriangleMesh::DeformAsRigidAsPossible( std::swap(Rs, Rs_old); } -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < int(vertices_.size()); ++i) { - // Update rotations - Eigen::Matrix3d S = Eigen::Matrix3d::Zero(); - Eigen::Matrix3d R = Eigen::Matrix3d::Zero(); - int n_nbs = 0; - for (int j : prime->adjacency_list_[i]) { - Eigen::Vector3d e0 = vertices_[i] - vertices_[j]; - Eigen::Vector3d e1 = prime->vertices_[i] - prime->vertices_[j]; - double w = edge_weights[GetOrderedEdge(i, j)]; - S += w * (e0 * e1.transpose()); - if (energy_model == DeformAsRigidAsPossibleEnergy::Smoothed) { - R += Rs_old[j]; - } - n_nbs++; - } - if (energy_model == DeformAsRigidAsPossibleEnergy::Smoothed && - iter > 0 && n_nbs > 0) { - S = 2 * S + - (4 * smoothed_alpha * surface_area / n_nbs) * R.transpose(); - } - Eigen::JacobiSVD svd( - S, Eigen::ComputeFullU | Eigen::ComputeFullV); - Eigen::Matrix3d U = svd.matrixU(); - Eigen::Matrix3d V = svd.matrixV(); - Eigen::Vector3d D(1, 1, (V * U.transpose()).determinant()); - // ensure rotation: - // http://graphics.stanford.edu/~smr/ICP/comparison/eggert_comparison_mva97.pdf - Rs[i] = V * D.asDiagonal() * U.transpose(); - if (Rs[i].determinant() <= 0) { - utility::LogError( - "something went wrong with " - "updating R"); - } - } - -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < int(vertices_.size()); ++i) { - // Update Positions - Eigen::Vector3d bi(0, 0, 0); - if (constraints.count(i) > 0) { - bi = constraints[i]; - } else { + tbb::parallel_for(tbb::blocked_range( + 0, vertices_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (int i = range.begin(); i < range.end(); ++i) { + // Update rotations + Eigen::Matrix3d S = Eigen::Matrix3d::Zero(); + Eigen::Matrix3d R = Eigen::Matrix3d::Zero(); + int n_nbs = 0; for (int j : prime->adjacency_list_[i]) { + Eigen::Vector3d e0 = vertices_[i] - vertices_[j]; + Eigen::Vector3d e1 = prime->vertices_[i] - prime->vertices_[j]; double w = edge_weights[GetOrderedEdge(i, j)]; - bi += w / 2 * - ((Rs[i] + Rs[j]) * (vertices_[i] - vertices_[j])); + S += w * (e0 * e1.transpose()); + if (energy_model == DeformAsRigidAsPossibleEnergy::Smoothed) { + R += Rs_old[j]; + } + n_nbs++; + } + if (energy_model == DeformAsRigidAsPossibleEnergy::Smoothed && + iter > 0 && n_nbs > 0) { + S = 2 * S + (4 * smoothed_alpha * surface_area / n_nbs) + * R.transpose(); + } + Eigen::JacobiSVD svd( + S, Eigen::ComputeFullU | Eigen::ComputeFullV); + Eigen::Matrix3d U = svd.matrixU(); + Eigen::Matrix3d V = svd.matrixV(); + Eigen::Vector3d D(1, 1, (V * U.transpose()).determinant()); + // ensure rotation: + // http://graphics.stanford.edu/~smr/ICP/comparison/eggert_comparison_mva97.pdf + Rs[i] = V * D.asDiagonal() * U.transpose(); + if (Rs[i].determinant() <= 0) { + utility::LogError("something went wrong with updating R"); } } - b[0](i) = bi(0); - b[1](i) = bi(1); - b[2](i) = bi(2); - } -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int comp = 0; comp < 3; ++comp) { - Eigen::VectorXd p_prime = solver.solve(b[comp]); - if (solver.info() != Eigen::Success) { - utility::LogError("Cholesky solve failed"); + }); + + tbb::parallel_for(tbb::blocked_range( + 0, vertices_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (int i = range.begin(); i < range.end(); ++i) { + // Update Positions + Eigen::Vector3d bi(0, 0, 0); + if (constraints.count(i) > 0) { + bi = constraints[i]; + } else { + for (int j : prime->adjacency_list_[i]) { + double w = edge_weights[GetOrderedEdge(i, j)]; + bi += w / 2 * + ((Rs[i] + Rs[j]) * (vertices_[i] - vertices_[j])); + } + } + b[0](i) = bi(0); + b[1](i) = bi(1); + b[2](i) = bi(2); } - for (int i = 0; i < int(vertices_.size()); ++i) { - prime->vertices_[i](comp) = p_prime(i); + }); + + tbb::parallel_for(tbb::blocked_range(0, 3, 1), + [&](const tbb::blocked_range& range){ + for (int comp = range.begin(); comp < range.end(); ++comp) { + Eigen::VectorXd p_prime = solver.solve(b[comp]); + if (solver.info() != Eigen::Success) { + utility::LogError("Cholesky solve failed"); + } + for (int i = 0; i < int(vertices_.size()); ++i) { + prime->vertices_[i](comp) = p_prime(i); + } } - } + }); + // Compute energy and log double energy = 0; diff --git a/cpp/open3d/io/sensor/azure_kinect/AzureKinectSensor.cpp b/cpp/open3d/io/sensor/azure_kinect/AzureKinectSensor.cpp index 962113b2df3..e137fa57933 100644 --- a/cpp/open3d/io/sensor/azure_kinect/AzureKinectSensor.cpp +++ b/cpp/open3d/io/sensor/azure_kinect/AzureKinectSensor.cpp @@ -11,6 +11,9 @@ #include #include +#include +#include + #include #include "open3d/geometry/RGBDImage.h" @@ -160,20 +163,19 @@ void ConvertBGRAToRGB(geometry::Image &bgra, geometry::Image &rgb) { "dimensions."); } -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(3) schedule(static) -#endif - for (int v = 0; v < bgra.height_; ++v) { - for (int u = 0; u < bgra.width_; ++u) { - for (int c = 0; c < 3; ++c) { - *rgb.PointerAt(u, v, c) = - *bgra.PointerAt(u, v, 2 - c); + tbb::parallel_for(tbb::blocked_range2d( + 0, bgra.height_, utility::DefaultGrainSizeTBB2D(), + 0, bgra.width_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range2d& range){ + for (int v = range.rows().begin(); v < range.rows().end(); ++v) { + for (int u = range.cols().begin(); u < range.cols().end(); ++u) { + for (int c = 0; c < 3; ++c) { + *rgb.PointerAt(u, v, c) = + *bgra.PointerAt(u, v, 2 - c); + } } } - } + }); } bool AzureKinectSensor::PrintFirmware(k4a_device_t device) { diff --git a/cpp/open3d/pipelines/color_map/ColorMapUtils.cpp b/cpp/open3d/pipelines/color_map/ColorMapUtils.cpp index 8ab677d3a3a..10539c4a12b 100644 --- a/cpp/open3d/pipelines/color_map/ColorMapUtils.cpp +++ b/cpp/open3d/pipelines/color_map/ColorMapUtils.cpp @@ -7,6 +7,10 @@ #include "open3d/pipelines/color_map/ColorMapUtils.h" +#include +#include +#include + #include "open3d/camera/PinholeCameraTrajectory.h" #include "open3d/geometry/Image.h" #include "open3d/geometry/KDTreeFlann.h" @@ -123,50 +127,56 @@ CreateVertexAndImageVisibility( const camera::PinholeCameraTrajectory& camera_trajectory, double maximum_allowable_depth, double depth_threshold_for_visibility_check) { - size_t n_camera = camera_trajectory.parameters_.size(); - size_t n_vertex = mesh.vertices_.size(); + int n_camera = static_cast(camera_trajectory.parameters_.size()); + int n_vertex = static_cast(mesh.vertices_.size()); // visibility_image_to_vertex[c]: vertices visible by camera c. std::vector> visibility_image_to_vertex; visibility_image_to_vertex.resize(n_camera); + + tbb::parallel_for(tbb::blocked_range(0, n_camera, 1), + [&](const tbb::blocked_range& range) { + for (int camera_id = range.begin(); + camera_id < range.end(); ++camera_id) { + for (int vertex_id = 0; vertex_id < n_vertex; vertex_id++) { + Eigen::Vector3d X = mesh.vertices_[vertex_id]; + float u, v, d; + std::tie(u, v, d) = Project3DPointAndGetUVDepth( + X, camera_trajectory.parameters_[camera_id]); + int u_d = int(round(u)); + int v_d = int(round(v)); + // Skip if vertex in image boundary. + if (d < 0.0 || + !images_depth[camera_id].TestImageBoundary(u_d, v_d)) { + continue; + } + // Skip if vertex's depth is too large (e.g. background). + float d_sensor = + *images_depth[camera_id].PointerAt(u_d, v_d); + if (d_sensor > maximum_allowable_depth) { + continue; + } + // Check depth boundary mask. If a vertex is located at the boundary + // of an object, its color will be highly diverse from different + // viewing angles. + if (*images_mask[camera_id].PointerAt(u_d, v_d) == 255) { + continue; + } + // Check depth errors. + if (std::fabs(d - d_sensor) >= + depth_threshold_for_visibility_check) { + continue; + } + visibility_image_to_vertex[camera_id].push_back(vertex_id); + } + } + }); + // visibility_vertex_to_image[v]: cameras that can see vertex v. std::vector> visibility_vertex_to_image; visibility_vertex_to_image.resize(n_vertex); - -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int camera_id = 0; camera_id < int(n_camera); camera_id++) { - for (int vertex_id = 0; vertex_id < int(n_vertex); vertex_id++) { - Eigen::Vector3d X = mesh.vertices_[vertex_id]; - float u, v, d; - std::tie(u, v, d) = Project3DPointAndGetUVDepth( - X, camera_trajectory.parameters_[camera_id]); - int u_d = int(round(u)); - int v_d = int(round(v)); - // Skip if vertex in image boundary. - if (d < 0.0 || - !images_depth[camera_id].TestImageBoundary(u_d, v_d)) { - continue; - } - // Skip if vertex's depth is too large (e.g. background). - float d_sensor = - *images_depth[camera_id].PointerAt(u_d, v_d); - if (d_sensor > maximum_allowable_depth) { - continue; - } - // Check depth boundary mask. If a vertex is located at the boundary - // of an object, its color will be highly diverse from different - // viewing angles. - if (*images_mask[camera_id].PointerAt(u_d, v_d) == 255) { - continue; - } - // Check depth errors. - if (std::fabs(d - d_sensor) >= - depth_threshold_for_visibility_check) { - continue; - } - visibility_image_to_vertex[camera_id].push_back(vertex_id); -#pragma omp critical(CreateVertexAndImageVisibility) - { visibility_vertex_to_image[vertex_id].push_back(camera_id); } + for (int i = 0; i < n_camera; ++i) { + for (int vidx: visibility_image_to_vertex[i]) { + visibility_vertex_to_image[vidx].emplace_back(i); } } @@ -193,37 +203,39 @@ void SetProxyIntensityForVertex( auto n_vertex = mesh.vertices_.size(); proxy_intensity.resize(n_vertex); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < int(n_vertex); i++) { - proxy_intensity[i] = 0.0; - float sum = 0.0; - for (size_t iter = 0; iter < visibility_vertex_to_image[i].size(); - iter++) { - int j = visibility_vertex_to_image[i][iter]; - float gray; - bool valid = false; - if (warping_fields.has_value()) { - std::tie(valid, gray) = QueryImageIntensity( - images_gray[j], warping_fields.value()[j], - mesh.vertices_[i], camera_trajectory.parameters_[j], - utility::nullopt, image_boundary_margin); - } else { - std::tie(valid, gray) = QueryImageIntensity( - images_gray[j], utility::nullopt, mesh.vertices_[i], - camera_trajectory.parameters_[j], utility::nullopt, - image_boundary_margin); - } + tbb::parallel_for(tbb::blocked_range( + 0, n_vertex, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + proxy_intensity[i] = 0.0; + float sum = 0.0; + for (size_t iter = 0; iter < visibility_vertex_to_image[i].size(); + iter++) { + int j = visibility_vertex_to_image[i][iter]; + float gray; + bool valid = false; + if (warping_fields.has_value()) { + std::tie(valid, gray) = QueryImageIntensity( + images_gray[j], warping_fields.value()[j], + mesh.vertices_[i], camera_trajectory.parameters_[j], + utility::nullopt, image_boundary_margin); + } else { + std::tie(valid, gray) = QueryImageIntensity( + images_gray[j], utility::nullopt, mesh.vertices_[i], + camera_trajectory.parameters_[j], utility::nullopt, + image_boundary_margin); + } - if (valid) { - sum += 1.0; - proxy_intensity[i] += gray; + if (valid) { + sum += 1.0; + proxy_intensity[i] += gray; + } + } + if (sum > 0) { + proxy_intensity[i] /= sum; } } - if (sum > 0) { - proxy_intensity[i] /= sum; - } - } + }); } void SetGeometryColorAverage( @@ -237,43 +249,46 @@ void SetGeometryColorAverage( size_t n_vertex = mesh.vertices_.size(); mesh.vertex_colors_.clear(); mesh.vertex_colors_.resize(n_vertex); - std::vector valid_vertices; - std::vector invalid_vertices; -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)n_vertex; i++) { - mesh.vertex_colors_[i] = Eigen::Vector3d::Zero(); - double sum = 0.0; - for (size_t iter = 0; iter < visibility_vertex_to_image[i].size(); - iter++) { - int j = visibility_vertex_to_image[i][iter]; - uint8_t r_temp, g_temp, b_temp; - bool valid = false; - utility::optional optional_warping_field; - if (warping_fields.has_value()) { - optional_warping_field = warping_fields.value()[j]; - } else { - optional_warping_field = utility::nullopt; - } - std::tie(valid, r_temp) = QueryImageIntensity( - images_color[j], optional_warping_field, mesh.vertices_[i], - camera_trajectory.parameters_[j], 0, image_boundary_margin); - std::tie(valid, g_temp) = QueryImageIntensity( - images_color[j], optional_warping_field, mesh.vertices_[i], - camera_trajectory.parameters_[j], 1, image_boundary_margin); - std::tie(valid, b_temp) = QueryImageIntensity( - images_color[j], optional_warping_field, mesh.vertices_[i], - camera_trajectory.parameters_[j], 2, image_boundary_margin); - float r = (float)r_temp / 255.0f; - float g = (float)g_temp / 255.0f; - float b = (float)b_temp / 255.0f; - if (valid) { - mesh.vertex_colors_[i] += Eigen::Vector3d(r, g, b); - sum += 1.0; + tbb::concurrent_vector valid_vertices; + tbb::concurrent_vector invalid_vertices; + + tbb::parallel_for(tbb::blocked_range( + 0, n_vertex, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + mesh.vertex_colors_[i] = Eigen::Vector3d::Zero(); + double sum = 0.0; + for (size_t iter = 0; iter < visibility_vertex_to_image[i].size(); + iter++) { + int j = visibility_vertex_to_image[i][iter]; + uint8_t r_temp, g_temp, b_temp; + bool valid = false; + utility::optional optional_warping_field; + if (warping_fields.has_value()) { + optional_warping_field = warping_fields.value()[j]; + } else { + optional_warping_field = utility::nullopt; + } + std::tie(valid, r_temp) = QueryImageIntensity( + images_color[j], optional_warping_field, + mesh.vertices_[i],camera_trajectory.parameters_[j], + 0, image_boundary_margin); + std::tie(valid, g_temp) = QueryImageIntensity( + images_color[j], optional_warping_field, + mesh.vertices_[i], camera_trajectory.parameters_[j], + 1, image_boundary_margin); + std::tie(valid, b_temp) = QueryImageIntensity( + images_color[j], optional_warping_field, + mesh.vertices_[i], camera_trajectory.parameters_[j], + 2, image_boundary_margin); + float r = (float)r_temp / 255.0f; + float g = (float)g_temp / 255.0f; + float b = (float)b_temp / 255.0f; + if (valid) { + mesh.vertex_colors_[i] += Eigen::Vector3d(r, g, b); + sum += 1.0; + } } - } -#pragma omp critical(SetGeometryColorAverage) - { if (sum > 0.0) { mesh.vertex_colors_[i] /= sum; valid_vertices.push_back(i); @@ -281,15 +296,13 @@ void SetGeometryColorAverage( invalid_vertices.push_back(i); } } - } + }); if (invisible_vertex_color_knn > 0) { - std::shared_ptr valid_mesh = - mesh.SelectByIndex(valid_vertices); + std::shared_ptr valid_mesh = mesh.SelectByIndex( + {valid_vertices.begin(), valid_vertices.end()}); geometry::KDTreeFlann kd_tree(*valid_mesh); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)invalid_vertices.size(); ++i) { - size_t invalid_vertex = invalid_vertices[i]; + tbb::parallel_for_each(invalid_vertices, + [&](std::size_t invalid_vertex){ std::vector indices; // indices to valid_mesh std::vector dists; kd_tree.SearchKNN(mesh.vertices_[invalid_vertex], @@ -298,11 +311,11 @@ void SetGeometryColorAverage( for (const int& index : indices) { new_color += valid_mesh->vertex_colors_[index]; } - if (indices.size() > 0) { + if (indices.empty()) { new_color /= static_cast(indices.size()); } mesh.vertex_colors_[invalid_vertex] = new_color; - } + }); } } diff --git a/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp b/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp index 9dfc98fdfa8..d93cc16f4b4 100644 --- a/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp +++ b/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp @@ -10,6 +10,10 @@ #include #include +#include +#include +#include + #include "open3d/io/ImageIO.h" #include "open3d/io/ImageWarpingFieldIO.h" #include "open3d/io/PinholeCameraTrajectoryIO.h" @@ -43,6 +47,55 @@ static std::vector CreateWarpingFields( return fields; } +template +struct JTJandJTrNonRigidReducer { + // Global Data + using FuncType = std::function; + FuncType& f; + // Local Data + MatOutType JTJ; + VecOutType JTr; + double r2_sum = 0.0; + + JTJandJTrNonRigidReducer(FuncType& f_, int nonrigidval) + : f(f_), JTJ(MatOutType::Zero(6 + nonrigidval, 6 + nonrigidval)), + JTr(VecOutType::Zero(6 + nonrigidval)) {}; + + JTJandJTrNonRigidReducer(JTJandJTrNonRigidReducer& o, tbb::split) + : f(o.f), JTJ(MatOutType::Zero(o.JTJ.rows(), o.JTJ.cols())), + JTr(VecOutType::Zero(o.JTr.rows(), o.JTr.cols())) {} + + void operator()(const tbb::blocked_range& range) { + VecInTypeDouble J_r; + VecInTypeInt pattern; + double r; + for (int i = range.begin(); i < range.end(); i++) { + f(i, J_r, r, pattern); + for (auto x = 0; x < J_r.size(); x++) { + for (auto y = 0; y < J_r.size(); y++) { + JTJ(pattern(x), pattern(y)) += J_r(x) * J_r(y); + } + } + for (auto x = 0; x < J_r.size(); x++) { + JTr(pattern(x)) += r * J_r(x); + } + r2_sum += r * r; + } + } + + void join(const JTJandJTrNonRigidReducer& rhs) { + JTJ += rhs.JTJ; + JTr += rhs.JTr; + r2_sum += rhs.r2_sum; + } + + std::tuple as_tuple() && { + return {std::move(JTJ), std::move(JTr), r2_sum}; + } +}; + /// Function to compute JTJ and Jtr /// Input: function pointer f and total number of rows of Jacobian matrix /// Output: JTJ, JTr, sum of r^2 @@ -58,46 +111,16 @@ static std::tuple ComputeJTJandJTrNonRigid( int iteration_num, int nonrigidval, bool verbose /*=true*/) { - MatOutType JTJ(6 + nonrigidval, 6 + nonrigidval); - VecOutType JTr(6 + nonrigidval); - double r2_sum = 0.0; - JTJ.setZero(); - JTr.setZero(); -#pragma omp parallel - { - MatOutType JTJ_private(6 + nonrigidval, 6 + nonrigidval); - VecOutType JTr_private(6 + nonrigidval); - double r2_sum_private = 0.0; - JTJ_private.setZero(); - JTr_private.setZero(); - VecInTypeDouble J_r; - VecInTypeInt pattern; - double r; -#pragma omp for nowait - for (int i = 0; i < iteration_num; i++) { - f(i, J_r, r, pattern); - for (auto x = 0; x < J_r.size(); x++) { - for (auto y = 0; y < J_r.size(); y++) { - JTJ_private(pattern(x), pattern(y)) += J_r(x) * J_r(y); - } - } - for (auto x = 0; x < J_r.size(); x++) { - JTr_private(pattern(x)) += r * J_r(x); - } - r2_sum_private += r * r; - } -#pragma omp critical(ComputeJTJandJTrNonRigid) - { - JTJ += JTJ_private; - JTr += JTr_private; - r2_sum += r2_sum_private; - } - } + JTJandJTrNonRigidReducer reducer(f, nonrigidval); + tbb::parallel_reduce(tbb::blocked_range(0, iteration_num, + utility::DefaultGrainSizeTBB()), reducer); + auto out = std::move(reducer).as_tuple(); if (verbose) { utility::LogDebug("Residual : {:.2e} (# of elements : {:d})", - r2_sum / (double)iteration_num, iteration_num); + std::get<2>(out) / (double)iteration_num, iteration_num); } - return std::make_tuple(std::move(JTJ), std::move(JTr), r2_sum); + return out; } static void ComputeJacobianAndResidualNonRigid( @@ -200,6 +223,15 @@ static void ComputeJacobianAndResidualNonRigid( r = (gray - proxy_intensity[vid]); } +inline void atomic_sum(std::atomic& total, const double& val) { +#if defined(__cpp_lib_atomic_float) && __cpp_lib_atomic >= 201711L + total.fetch_add(val); +#else + double expected = total; + while(!total.compare_exchange_weak(expected, expected + val)); +#endif +} + std::pair RunNonRigidOptimizer(const geometry::TriangleMesh& mesh, const std::vector& images_rgbd, @@ -282,75 +314,74 @@ RunNonRigidOptimizer(const geometry::TriangleMesh& mesh, option.image_boundary_margin_); for (int itr = 0; itr < option.maximum_iteration_; itr++) { utility::LogDebug("[Iteration {:04d}] ", itr + 1); - double residual = 0.0; - double residual_reg = 0.0; -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int c = 0; c < n_camera; c++) { - int nonrigidval = warping_fields[c].anchor_w_ * - warping_fields[c].anchor_h_ * 2; - double rr_reg = 0.0; - - Eigen::Matrix4d pose; - pose = opt_camera_trajectory.parameters_[c].extrinsic_; - - auto intrinsic = opt_camera_trajectory.parameters_[c] - .intrinsic_.intrinsic_matrix_; - auto extrinsic = opt_camera_trajectory.parameters_[c].extrinsic_; - Eigen::Matrix4d intr = Eigen::Matrix4d::Zero(); - intr.block<3, 3>(0, 0) = intrinsic; - intr(3, 3) = 1.0; - - auto f_lambda = [&](int i, Eigen::Vector14d& J_r, double& r, - Eigen::Vector14i& pattern) { - ComputeJacobianAndResidualNonRigid( - i, J_r, r, pattern, opt_mesh, proxy_intensity, - images_gray[c], images_dx[c], images_dy[c], - warping_fields[c], intr, extrinsic, - visibility_image_to_vertex[c], - option.image_boundary_margin_); - }; - Eigen::MatrixXd JTJ; - Eigen::VectorXd JTr; - double r2; - std::tie(JTJ, JTr, r2) = - ComputeJTJandJTrNonRigid( - f_lambda, int(visibility_image_to_vertex[c].size()), - nonrigidval, false); - - double weight = option.non_rigid_anchor_point_weight_ * - visibility_image_to_vertex[c].size() / n_vertex; - for (int j = 0; j < nonrigidval; j++) { - double r = weight * (warping_fields[c].flow_(j) - - warping_fields_init[c].flow_(j)); - JTJ(6 + j, 6 + j) += weight * weight; - JTr(6 + j) += weight * r; - rr_reg += r * r; - } + std::atomic residual = 0.0; + std::atomic residual_reg = 0.0; + tbb::parallel_for(tbb::blocked_range(0, n_camera, 1), + [&](const tbb::blocked_range& range) { + for (int c = range.begin(); c < range.end(); ++c) { + int nonrigidval = warping_fields[c].anchor_w_ * + warping_fields[c].anchor_h_ * 2; + double rr_reg = 0.0; - bool success; - Eigen::VectorXd result; - std::tie(success, result) = utility::SolveLinearSystemPSD( - JTJ, -JTr, /*prefer_sparse=*/false, - /*check_symmetric=*/false, - /*check_det=*/false, /*check_psd=*/false); - Eigen::Vector6d result_pose; - result_pose << result.block(0, 0, 6, 1); - auto delta = utility::TransformVector6dToMatrix4d(result_pose); - pose = delta * pose; - - for (int j = 0; j < nonrigidval; j++) { - warping_fields[c].flow_(j) += result(6 + j); - } - opt_camera_trajectory.parameters_[c].extrinsic_ = pose; + Eigen::Matrix4d pose; + pose = opt_camera_trajectory.parameters_[c].extrinsic_; + + auto intrinsic = opt_camera_trajectory.parameters_[c] + .intrinsic_.intrinsic_matrix_; + auto extrinsic = opt_camera_trajectory. + parameters_[c].extrinsic_; + Eigen::Matrix4d intr = Eigen::Matrix4d::Zero(); + intr.block<3, 3>(0, 0) = intrinsic; + intr(3, 3) = 1.0; + + auto f_lambda = [&](int i, Eigen::Vector14d& J_r, double& r, + Eigen::Vector14i& pattern) { + ComputeJacobianAndResidualNonRigid( + i, J_r, r, pattern, opt_mesh, proxy_intensity, + images_gray[c], images_dx[c], images_dy[c], + warping_fields[c], intr, extrinsic, + visibility_image_to_vertex[c], + option.image_boundary_margin_); + }; + Eigen::MatrixXd JTJ; + Eigen::VectorXd JTr; + double r2; + std::tie(JTJ, JTr, r2) = + ComputeJTJandJTrNonRigid( + f_lambda, int(visibility_image_to_vertex[c].size()), + nonrigidval, false); -#pragma omp critical(RunNonRigidOptimizer) - { - residual += r2; - residual_reg += rr_reg; + double weight = option.non_rigid_anchor_point_weight_ * + visibility_image_to_vertex[c].size() / n_vertex; + for (int j = 0; j < nonrigidval; j++) { + double r = weight * (warping_fields[c].flow_(j) - + warping_fields_init[c].flow_(j)); + JTJ(6 + j, 6 + j) += weight * weight; + JTr(6 + j) += weight * r; + rr_reg += r * r; + } + + bool success; + Eigen::VectorXd result; + std::tie(success, result) = utility::SolveLinearSystemPSD( + JTJ, -JTr, /*prefer_sparse=*/false, + /*check_symmetric=*/false, + /*check_det=*/false, /*check_psd=*/false); + Eigen::Vector6d result_pose; + result_pose << result.block(0, 0, 6, 1); + auto delta = utility::TransformVector6dToMatrix4d(result_pose); + pose = delta * pose; + + for (int j = 0; j < nonrigidval; j++) { + warping_fields[c].flow_(j) += result(6 + j); + } + opt_camera_trajectory.parameters_[c].extrinsic_ = pose; + + atomic_sum(residual, r2); + atomic_sum(residual_reg, rr_reg); } - } + }); utility::LogDebug("Residual error : {:.6f}, reg : {:.6f}", residual, residual_reg); SetProxyIntensityForVertex(opt_mesh, images_gray, warping_fields, diff --git a/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp b/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp index b506d401a6f..f507e996098 100644 --- a/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp +++ b/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp @@ -10,6 +10,8 @@ #include #include +#include + #include "open3d/io/ImageIO.h" #include "open3d/io/PinholeCameraTrajectoryIO.h" #include "open3d/io/TriangleMeshIO.h" @@ -70,6 +72,15 @@ static void ComputeJacobianAndResidualRigid( w = 1.0; // Dummy. } +inline void atomic_sum(std::atomic& total, const double& val) { +#if defined(__cpp_lib_atomic_float) && __cpp_lib_atomic >= 201711L + total.fetch_add(val); +#else + double expected = total; + while(!total.compare_exchange_weak(expected, expected + val)); +#endif +} + std::pair RunRigidOptimizer(const geometry::TriangleMesh& mesh, const std::vector& images_rgbd, @@ -136,7 +147,7 @@ RunRigidOptimizer(const geometry::TriangleMesh& mesh, utility::LogDebug("[ColorMapOptimization] Rigid Optimization"); std::vector proxy_intensity; - int total_num_ = 0; + std::atomic total_num_ = 0; int n_camera = int(opt_camera_trajectory.parameters_.size()); SetProxyIntensityForVertex(opt_mesh, images_gray, utility::nullopt, opt_camera_trajectory, @@ -144,51 +155,48 @@ RunRigidOptimizer(const geometry::TriangleMesh& mesh, option.image_boundary_margin_); for (int itr = 0; itr < option.maximum_iteration_; itr++) { utility::LogDebug("[Iteration {:04d}] ", itr + 1); - double residual = 0.0; + std::atomic residual = 0.0; total_num_ = 0; -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int c = 0; c < n_camera; c++) { - Eigen::Matrix4d pose; - pose = opt_camera_trajectory.parameters_[c].extrinsic_; - - auto intrinsic = opt_camera_trajectory.parameters_[c] - .intrinsic_.intrinsic_matrix_; - auto extrinsic = opt_camera_trajectory.parameters_[c].extrinsic_; - Eigen::Matrix4d intr = Eigen::Matrix4d::Zero(); - intr.block<3, 3>(0, 0) = intrinsic; - intr(3, 3) = 1.0; - - auto f_lambda = [&](int i, Eigen::Vector6d& J_r, double& r, - double& w) { - w = 1.0; // Dummy. - ComputeJacobianAndResidualRigid( - i, J_r, r, w, opt_mesh, proxy_intensity, images_gray[c], - images_dx[c], images_dy[c], intr, extrinsic, - visibility_image_to_vertex[c], - option.image_boundary_margin_); - }; - Eigen::Matrix6d JTJ; - Eigen::Vector6d JTr; - double r2; - std::tie(JTJ, JTr, r2) = - utility::ComputeJTJandJTr( - f_lambda, int(visibility_image_to_vertex[c].size()), - false); - - bool is_success; - Eigen::Matrix4d delta; - std::tie(is_success, delta) = - utility::SolveJacobianSystemAndObtainExtrinsicMatrix(JTJ, - JTr); - pose = delta * pose; - opt_camera_trajectory.parameters_[c].extrinsic_ = pose; -#pragma omp critical(RunRigidOptimizer) - { - residual += r2; + tbb::parallel_for(tbb::blocked_range(0, n_camera, 1), + [&](const tbb::blocked_range& range) { + for (int c = range.begin(); c < range.end(); ++c) { + Eigen::Matrix4d pose; + pose = opt_camera_trajectory.parameters_[c].extrinsic_; + + auto intrinsic = opt_camera_trajectory. + parameters_[c].intrinsic_.intrinsic_matrix_; + auto extrinsic = opt_camera_trajectory. + parameters_[c].extrinsic_; + Eigen::Matrix4d intr = Eigen::Matrix4d::Zero(); + intr.block<3, 3>(0, 0) = intrinsic; + intr(3, 3) = 1.0; + + auto f_lambda = [&](int i, Eigen::Vector6d& J_r, + double& r, double& w) { + w = 1.0; // Dummy. + ComputeJacobianAndResidualRigid( + i, J_r, r, w, opt_mesh, proxy_intensity, + images_gray[c], images_dx[c], images_dy[c], intr, + extrinsic, visibility_image_to_vertex[c], + option.image_boundary_margin_); + }; + Eigen::Matrix6d JTJ; + Eigen::Vector6d JTr; + double r2; + std::tie(JTJ, JTr, r2) = utility::ComputeJTJandJTr< + Eigen::Matrix6d, Eigen::Vector6d>(f_lambda, + int(visibility_image_to_vertex[c].size()), false); + + Eigen::Matrix4d delta; + std::tie(std::ignore, delta) = + utility::SolveJacobianSystemAndObtainExtrinsicMatrix( + JTJ, JTr); + pose = delta * pose; + opt_camera_trajectory.parameters_[c].extrinsic_ = pose; + atomic_sum(residual, r2); total_num_ += int(visibility_image_to_vertex[c].size()); } - } + }); if (total_num_ > 0) { utility::LogDebug("Residual error : {:.6f} (avg : {:.6f})", residual, residual / total_num_); diff --git a/cpp/open3d/pipelines/integration/UniformTSDFVolume.cpp b/cpp/open3d/pipelines/integration/UniformTSDFVolume.cpp index 7cb4eba6f38..6698fc149d5 100644 --- a/cpp/open3d/pipelines/integration/UniformTSDFVolume.cpp +++ b/cpp/open3d/pipelines/integration/UniformTSDFVolume.cpp @@ -11,6 +11,10 @@ #include #include +#include +#include +#include + #include "open3d/geometry/VoxelGrid.h" #include "open3d/pipelines/integration/MarchingCubesConst.h" #include "open3d/utility/Helper.h" @@ -257,28 +261,28 @@ std::shared_ptr UniformTSDFVolume::ExtractVoxelGrid() voxel_grid->voxel_size_ = voxel_length_; voxel_grid->origin_ = origin_; -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int x = 0; x < resolution_; x++) { - for (int y = 0; y < resolution_; y++) { - for (int z = 0; z < resolution_; z++) { - const int ind = IndexOf(x, y, z); - const float w = voxels_[ind].weight_; - const float f = voxels_[ind].tsdf_; - if (w != 0.0f && f < 0.98f && f >= -0.98f) { - double c = (f + 1.0) * 0.5; - Eigen::Vector3d color = Eigen::Vector3d(c, c, c); - Eigen::Vector3i index = Eigen::Vector3i(x, y, z); - voxel_grid->voxels_[index] = geometry::Voxel(index, color); + tbb::parallel_for(tbb::blocked_range3d( + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range3d& range){ + for (int x = range.pages().begin(); x < range.pages().end(); x++) { + for (int y = range.rows().begin(); y < range.rows().end(); y++) { + for (int z = range.cols().begin(); z < range.cols().end(); z++){ + const int ind = IndexOf(x, y, z); + const float w = voxels_[ind].weight_; + const float f = voxels_[ind].tsdf_; + if (w != 0.0f && f < 0.98f && f >= -0.98f) { + double c = (f + 1.0) * 0.5; + Eigen::Vector3d color = Eigen::Vector3d(c, c, c); + Eigen::Vector3i index = Eigen::Vector3i(x, y, z); + voxel_grid->voxels_[index] = + geometry::Voxel(index, color); + } } } } - } + }); return voxel_grid; } @@ -286,23 +290,22 @@ std::vector UniformTSDFVolume::ExtractVolumeTSDF() const { std::vector sharedvoxels_; sharedvoxels_.resize(voxel_num_); -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int x = 0; x < resolution_; x++) { - for (int y = 0; y < resolution_; y++) { - for (int z = 0; z < resolution_; z++) { - const int ind = IndexOf(x, y, z); - const float f = voxels_[ind].tsdf_; - const float w = voxels_[ind].weight_; - sharedvoxels_[ind] = Eigen::Vector2d(f, w); + tbb::parallel_for(tbb::blocked_range3d( + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range3d& range){ + for (int x = range.pages().begin(); x < range.pages().end(); x++) { + for (int y = range.rows().begin(); y < range.rows().end(); y++) { + for (int z = range.cols().begin(); z < range.cols().end(); z++){ + const int ind = IndexOf(x, y, z); + const float f = voxels_[ind].tsdf_; + const float w = voxels_[ind].weight_; + sharedvoxels_[ind] = Eigen::Vector2d(f, w); + } } } - } + }); return sharedvoxels_; } @@ -310,61 +313,58 @@ std::vector UniformTSDFVolume::ExtractVolumeColor() const { std::vector sharedcolors_; sharedcolors_.resize(voxel_num_); -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int x = 0; x < resolution_; x++) { - for (int y = 0; y < resolution_; y++) { - for (int z = 0; z < resolution_; z++) { - const int ind = IndexOf(x, y, z); - sharedcolors_[ind] = voxels_[ind].color_; + tbb::parallel_for(tbb::blocked_range3d( + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range3d& range){ + for (int x = range.pages().begin(); x < range.pages().end(); x++) { + for (int y = range.rows().begin(); y < range.rows().end(); y++) { + for (int z = range.cols().begin(); z < range.cols().end(); z++){ + const int ind = IndexOf(x, y, z); + sharedcolors_[ind] = voxels_[ind].color_; + } } } - } + }); return sharedcolors_; } void UniformTSDFVolume::InjectVolumeTSDF( const std::vector &sharedvoxels) { -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int x = 0; x < resolution_; x++) { - for (int y = 0; y < resolution_; y++) { - for (int z = 0; z < resolution_; z++) { - const int ind = IndexOf(x, y, z); - voxels_[ind].tsdf_ = sharedvoxels[ind](0); - voxels_[ind].weight_ = sharedvoxels[ind](1); + tbb::parallel_for(tbb::blocked_range3d( + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range3d& range){ + for (int x = range.pages().begin(); x < range.pages().end(); x++) { + for (int y = range.rows().begin(); y < range.rows().end(); y++) { + for (int z = range.cols().begin(); z < range.cols().end(); z++){ + const int ind = IndexOf(x, y, z); + voxels_[ind].tsdf_ = sharedvoxels[ind](0); + voxels_[ind].weight_ = sharedvoxels[ind](1); + } } } - } + }); } void UniformTSDFVolume::InjectVolumeColor( const std::vector &sharedcolors) { -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int x = 0; x < resolution_; x++) { - for (int y = 0; y < resolution_; y++) { - for (int z = 0; z < resolution_; z++) { - const int ind = IndexOf(x, y, z); - voxels_[ind].color_ = sharedcolors[ind]; + tbb::parallel_for(tbb::blocked_range3d( + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range3d& range){ + for (int x = range.pages().begin(); x < range.pages().end(); x++) { + for (int y = range.rows().begin(); y < range.rows().end(); y++) { + for (int z = range.cols().begin(); z < range.cols().end(); z++){ + const int ind = IndexOf(x, y, z); + voxels_[ind].color_ = sharedcolors[ind]; + } } } - } + }); } void UniformTSDFVolume::IntegrateWithDepthToCameraDistanceMultiplier( @@ -385,80 +385,75 @@ void UniformTSDFVolume::IntegrateWithDepthToCameraDistanceMultiplier( const float safe_width_f = intrinsic.width_ - 0.0001f; const float safe_height_f = intrinsic.height_ - 0.0001f; -#ifdef _WIN32 -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#else -#pragma omp parallel for collapse(2) schedule(static) \ - num_threads(utility::EstimateMaxThreads()) -#endif - for (int x = 0; x < resolution_; x++) { - for (int y = 0; y < resolution_; y++) { - Eigen::Vector4f pt_3d_homo(float(half_voxel_length_f + - voxel_length_f * x + origin_(0)), - float(half_voxel_length_f + - voxel_length_f * y + origin_(1)), - float(half_voxel_length_f + origin_(2)), - 1.f); - Eigen::Vector4f pt_camera = extrinsic_f * pt_3d_homo; - for (int z = 0; z < resolution_; z++, - pt_camera(0) += extrinsic_scaled_f(0, 2), - pt_camera(1) += extrinsic_scaled_f(1, 2), - pt_camera(2) += extrinsic_scaled_f(2, 2)) { - // Skip if negative depth after projection - if (pt_camera(2) <= 0) { - continue; - } - // Skip if x-y coordinate not in range - float u_f = pt_camera(0) * fx / pt_camera(2) + cx + 0.5f; - float v_f = pt_camera(1) * fy / pt_camera(2) + cy + 0.5f; - if (!(u_f >= 0.0001f && u_f < safe_width_f && v_f >= 0.0001f && - v_f < safe_height_f)) { - continue; - } - // Skip if negative depth in depth image - int u = (int)u_f; - int v = (int)v_f; - float d = *image.depth_.PointerAt(u, v); - if (d <= 0.0f) { - continue; - } + tbb::parallel_for(tbb::blocked_range2d( + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range2d& range) { + for (int x = range.rows().begin(); x < range.rows().end(); ++x) { + for (int y = range.cols().begin(); y < range.cols().end(); ++y) { + Eigen::Vector4f pt_3d_homo( + float(half_voxel_length_f + voxel_length_f * x + origin_(0)), + float(half_voxel_length_f + voxel_length_f * y + origin_(1)), + float(half_voxel_length_f + origin_(2)), 1.f); + Eigen::Vector4f pt_camera = extrinsic_f * pt_3d_homo; + for (int z = 0; z < resolution_; z++, + pt_camera(0) += extrinsic_scaled_f(0, 2), + pt_camera(1) += extrinsic_scaled_f(1, 2), + pt_camera(2) += extrinsic_scaled_f(2, 2)) { + // Skip if negative depth after projection + if (pt_camera(2) <= 0) { + continue; + } + // Skip if x-y coordinate not in range + float u_f = pt_camera(0) * fx / pt_camera(2) + cx + 0.5f; + float v_f = pt_camera(1) * fy / pt_camera(2) + cy + 0.5f; + if (!(u_f >= 0.0001f && u_f < safe_width_f && v_f >= 0.0001f && + v_f < safe_height_f)) { + continue; + } + // Skip if negative depth in depth image + int u = (int)u_f; + int v = (int)v_f; + float d = *image.depth_.PointerAt(u, v); + if (d <= 0.0f) { + continue; + } - int v_ind = IndexOf(x, y, z); - float sdf = - (d - pt_camera(2)) * - (*depth_to_camera_distance_multiplier.PointerAt( - u, v)); - if (sdf > -sdf_trunc_f) { - // integrate - float tsdf = std::min(1.0f, sdf * sdf_trunc_inv_f); - voxels_[v_ind].tsdf_ = - (voxels_[v_ind].tsdf_ * voxels_[v_ind].weight_ + - tsdf) / - (voxels_[v_ind].weight_ + 1.0f); - if (color_type_ == TSDFVolumeColorType::RGB8) { - const uint8_t *rgb = - image.color_.PointerAt(u, v, 0); - Eigen::Vector3d rgb_f(rgb[0], rgb[1], rgb[2]); - voxels_[v_ind].color_ = - (voxels_[v_ind].color_ * - voxels_[v_ind].weight_ + - rgb_f) / - (voxels_[v_ind].weight_ + 1.0f); - } else if (color_type_ == TSDFVolumeColorType::Gray32) { - const float *intensity = - image.color_.PointerAt(u, v, 0); - voxels_[v_ind].color_ = - (voxels_[v_ind].color_.array() * - voxels_[v_ind].weight_ + - (*intensity)) / + int v_ind = IndexOf(x, y, z); + float sdf = (d - pt_camera(2)) * + (*depth_to_camera_distance_multiplier + .PointerAt(u, v)); + if (sdf > -sdf_trunc_f) { + // integrate + float tsdf = std::min(1.0f, sdf * sdf_trunc_inv_f); + voxels_[v_ind].tsdf_ = + (voxels_[v_ind].tsdf_ * voxels_[v_ind].weight_ + + tsdf) / (voxels_[v_ind].weight_ + 1.0f); + if (color_type_ == TSDFVolumeColorType::RGB8) { + const uint8_t *rgb = + image.color_.PointerAt(u, v, 0); + Eigen::Vector3d rgb_f(rgb[0], rgb[1], rgb[2]); + voxels_[v_ind].color_ = + (voxels_[v_ind].color_ * + voxels_[v_ind].weight_ + + rgb_f) / + (voxels_[v_ind].weight_ + 1.0f); + } else if (color_type_ == TSDFVolumeColorType::Gray32) { + const float *intensity = + image.color_.PointerAt(u, v, 0); + voxels_[v_ind].color_ = + (voxels_[v_ind].color_.array() * + voxels_[v_ind].weight_ + + (*intensity)) / + (voxels_[v_ind].weight_ + 1.0f); + } + voxels_[v_ind].weight_ += 1.0f; } - voxels_[v_ind].weight_ += 1.0f; } } } - } + }); } Eigen::Vector3d UniformTSDFVolume::GetNormalAt(const Eigen::Vector3d &p) { diff --git a/cpp/open3d/pipelines/odometry/Odometry.cpp b/cpp/open3d/pipelines/odometry/Odometry.cpp index dd8b1569caa..d326c74f283 100644 --- a/cpp/open3d/pipelines/odometry/Odometry.cpp +++ b/cpp/open3d/pipelines/odometry/Odometry.cpp @@ -7,6 +7,9 @@ #include "open3d/pipelines/odometry/Odometry.h" +#include +#include + #include #include @@ -15,161 +18,164 @@ #include "open3d/pipelines/odometry/RGBDOdometryJacobian.h" #include "open3d/utility/Eigen.h" #include "open3d/utility/Timer.h" +#include "open3d/utility/Parallel.h" namespace open3d { namespace pipelines { namespace odometry { -static std::tuple InitializeCorrespondenceMap( - int width, int height) { - // initialization: filling with any (u,v) to (-1,-1) +struct CorrespondenceReduction { + // Global + const geometry::Image& depth_s; + const geometry::Image& depth_t; + const Eigen::Matrix3d& KRK_inv; + const Eigen::Vector3d& Kt; + const double depth_diff_max; + // Local geometry::Image correspondence_map; geometry::Image depth_buffer; - correspondence_map.Prepare(width, height, 2, 4); - depth_buffer.Prepare(width, height, 1, 4); - for (int v = 0; v < correspondence_map.height_; v++) { - for (int u = 0; u < correspondence_map.width_; u++) { - *correspondence_map.PointerAt(u, v, 0) = -1; - *correspondence_map.PointerAt(u, v, 1) = -1; - *depth_buffer.PointerAt(u, v, 0) = -1.0f; - } + + CorrespondenceReduction(const geometry::Image& depth_s_, + const geometry::Image& depth_t_, + const Eigen::Matrix3d& KRK_inv_, + const Eigen::Vector3d& Kt_, + const double depth_diff_max_) + : depth_s(depth_s_), depth_t(depth_t_), KRK_inv(KRK_inv_), + Kt(Kt_), depth_diff_max(depth_diff_max_) { + InitializeCorrespondenceMap(); } - return std::make_tuple(correspondence_map, depth_buffer); -} -static inline void AddElementToCorrespondenceMap( - geometry::Image &correspondence_map, - geometry::Image &depth_buffer, - int u_s, - int v_s, - int u_t, - int v_t, - float transformed_d_t) { - int exist_u_t, exist_v_t; - double exist_d_t; - exist_u_t = *correspondence_map.PointerAt(u_s, v_s, 0); - exist_v_t = *correspondence_map.PointerAt(u_s, v_s, 1); - if (exist_u_t != -1 && exist_v_t != -1) { - exist_d_t = *depth_buffer.PointerAt(u_s, v_s); - if (transformed_d_t < - exist_d_t) { // update nearer point as correspondence - *correspondence_map.PointerAt(u_s, v_s, 0) = u_t; - *correspondence_map.PointerAt(u_s, v_s, 1) = v_t; - *depth_buffer.PointerAt(u_s, v_s) = transformed_d_t; - } - } else { // register correspondence - *correspondence_map.PointerAt(u_s, v_s, 0) = u_t; - *correspondence_map.PointerAt(u_s, v_s, 1) = v_t; - *depth_buffer.PointerAt(u_s, v_s) = transformed_d_t; + CorrespondenceReduction(CorrespondenceReduction& o, tbb::split) + : depth_s(o.depth_s), depth_t(o.depth_t), KRK_inv(o.KRK_inv), + Kt(o.Kt), depth_diff_max(o.depth_diff_max) { + InitializeCorrespondenceMap(); + } + + void InitializeCorrespondenceMap() { + const int width = depth_t.width_; + const int height = depth_t.height_; + correspondence_map.Prepare(depth_t.width_, depth_t.height_, 2, 4); + depth_buffer.Prepare(depth_t.width_, depth_t.height_, 1, 4); + std::fill(correspondence_map.PointerAt(0, 0, 0), + correspondence_map.PointerAt(width, height, 1), -1); + std::fill(depth_buffer.PointerAt(0, 0, 0), + depth_buffer.PointerAt(width, height, 0), -1.0f); } -} -static void MergeCorrespondenceMaps(geometry::Image &correspondence_map, - geometry::Image &depth_buffer, - geometry::Image &correspondence_map_part, - geometry::Image &depth_buffer_part) { - for (int v_s = 0; v_s < correspondence_map.height_; v_s++) { - for (int u_s = 0; u_s < correspondence_map.width_; u_s++) { - int u_t = *correspondence_map_part.PointerAt(u_s, v_s, 0); - int v_t = *correspondence_map_part.PointerAt(u_s, v_s, 1); - if (u_t != -1 && v_t != -1) { - float transformed_d_t = - *depth_buffer_part.PointerAt(u_s, v_s); - AddElementToCorrespondenceMap(correspondence_map, depth_buffer, - u_s, v_s, u_t, v_t, - transformed_d_t); + void inline AddElementToCorrespondenceMap( + int u_s, int v_s, int u_t, int v_t, float transformed_d_t) { + int exist_u_t, exist_v_t; + double exist_d_t; + exist_u_t = *correspondence_map.PointerAt(u_s, v_s, 0); + exist_v_t = *correspondence_map.PointerAt(u_s, v_s, 1); + if (exist_u_t != -1 && exist_v_t != -1) { + exist_d_t = *depth_buffer.PointerAt(u_s, v_s); + if (transformed_d_t < exist_d_t) { + // update nearer point as correspondence + *correspondence_map.PointerAt(u_s, v_s, 0) = u_t; + *correspondence_map.PointerAt(u_s, v_s, 1) = v_t; + *depth_buffer.PointerAt(u_s, v_s) = transformed_d_t; } + } else { // register correspondence + *correspondence_map.PointerAt(u_s, v_s, 0) = u_t; + *correspondence_map.PointerAt(u_s, v_s, 1) = v_t; + *depth_buffer.PointerAt(u_s, v_s) = transformed_d_t; } } -} -static int CountCorrespondence(const geometry::Image &correspondence_map) { - int correspondence_count = 0; - for (int v_s = 0; v_s < correspondence_map.height_; v_s++) { - for (int u_s = 0; u_s < correspondence_map.width_; u_s++) { - int u_t = *correspondence_map.PointerAt(u_s, v_s, 0); - int v_t = *correspondence_map.PointerAt(u_s, v_s, 1); - if (u_t != -1 && v_t != -1) { - correspondence_count++; + void MergeCorrespondenceMaps(const CorrespondenceReduction& rhs) { + for (int v_s = 0; v_s < correspondence_map.height_; v_s++) { + for (int u_s = 0; u_s < correspondence_map.width_; u_s++) { + int u_t = *rhs.correspondence_map.PointerAt(u_s, v_s, 0); + int v_t = *rhs.correspondence_map.PointerAt(u_s, v_s, 1); + if (u_t != -1 && v_t != -1) { + float transformed_d_t = + *rhs.depth_buffer.PointerAt(u_s, v_s); + AddElementToCorrespondenceMap( + u_s, v_s, u_t, v_t, transformed_d_t); + } } } } - return correspondence_count; -} -CorrespondenceSetPixelWise ComputeCorrespondence( - const Eigen::Matrix3d &intrinsic_matrix, - const Eigen::Matrix4d &extrinsic, - const geometry::Image &depth_s, - const geometry::Image &depth_t, - const OdometryOption &option) { - const Eigen::Matrix3d K = intrinsic_matrix; - const Eigen::Matrix3d K_inv = K.inverse(); - const Eigen::Matrix3d R = extrinsic.block<3, 3>(0, 0); - const Eigen::Matrix3d KRK_inv = K * R * K_inv; - Eigen::Vector3d Kt = K * extrinsic.block<3, 1>(0, 3); - - geometry::Image correspondence_map; - geometry::Image depth_buffer; - std::tie(correspondence_map, depth_buffer) = - InitializeCorrespondenceMap(depth_t.width_, depth_t.height_); - -#pragma omp parallel - { - geometry::Image correspondence_map_private; - geometry::Image depth_buffer_private; - std::tie(correspondence_map_private, depth_buffer_private) = - InitializeCorrespondenceMap(depth_t.width_, depth_t.height_); -#pragma omp for nowait - for (int v_s = 0; v_s < depth_s.height_; v_s++) { - for (int u_s = 0; u_s < depth_s.width_; u_s++) { + void operator()(const tbb::blocked_range2d&r) { + for (int v_s = r.rows().begin(); v_s < r.rows().end(); ++v_s) { + for (int u_s = r.cols().begin(); u_s < r.cols().end(); u_s++) { double d_s = *depth_s.PointerAt(u_s, v_s); if (!std::isnan(d_s)) { Eigen::Vector3d uv_in_s = d_s * KRK_inv * Eigen::Vector3d(u_s, v_s, 1.0) + Kt; double transformed_d_s = uv_in_s(2); - int u_t = (int)(uv_in_s(0) / transformed_d_s + 0.5); - int v_t = (int)(uv_in_s(1) / transformed_d_s + 0.5); + int u_t = (int)std::round(uv_in_s(0) / transformed_d_s); + int v_t = (int)std::round(uv_in_s(1) / transformed_d_s); if (u_t >= 0 && u_t < depth_t.width_ && v_t >= 0 && v_t < depth_t.height_) { double d_t = *depth_t.PointerAt(u_t, v_t); if (!std::isnan(d_t) && std::abs(transformed_d_s - d_t) <= - option.depth_diff_max_) { + depth_diff_max) { AddElementToCorrespondenceMap( - correspondence_map_private, - depth_buffer_private, u_s, v_s, u_t, v_t, - (float)d_s); + u_s, v_s, u_t, v_t, (float)d_s); } } } } } -#pragma omp critical(ComputeCorrespondence) - { - MergeCorrespondenceMaps(correspondence_map, depth_buffer, - correspondence_map_private, - depth_buffer_private); - - } // omp critical - } // omp parallel - - CorrespondenceSetPixelWise correspondence; - int correspondence_count = CountCorrespondence(correspondence_map); - correspondence.resize(correspondence_count); - int cnt = 0; - for (int v_s = 0; v_s < correspondence_map.height_; v_s++) { - for (int u_s = 0; u_s < correspondence_map.width_; u_s++) { - int u_t = *correspondence_map.PointerAt(u_s, v_s, 0); - int v_t = *correspondence_map.PointerAt(u_s, v_s, 1); - if (u_t != -1 && v_t != -1) { - Eigen::Vector4i pixel_correspondence(u_s, v_s, u_t, v_t); - correspondence[cnt] = pixel_correspondence; - cnt++; + } + + inline void join(const CorrespondenceReduction& rhs) { + MergeCorrespondenceMaps(rhs); + } + + std::size_t CountCorrespondence() const { + std::size_t correspondence_count = 0; + for (int v_s = 0; v_s < correspondence_map.height_; v_s++) { + for (int u_s = 0; u_s < correspondence_map.width_; u_s++) { + int u_t = *correspondence_map.PointerAt(u_s, v_s, 0); + int v_t = *correspondence_map.PointerAt(u_s, v_s, 1); + if (u_t != -1 && v_t != -1) { + correspondence_count++; + } } } + return correspondence_count; } - return correspondence; + + CorrespondenceSetPixelWise correspondences() const { + CorrespondenceSetPixelWise out; + out.reserve(CountCorrespondence()); + for (int v_s = 0; v_s < correspondence_map.height_; v_s++) { + for (int u_s = 0; u_s < correspondence_map.width_; u_s++) { + int u_t = *correspondence_map.PointerAt(u_s, v_s, 0); + int v_t = *correspondence_map.PointerAt(u_s, v_s, 1); + if (u_t != -1 && v_t != -1) { + out.emplace_back(u_s, v_s, u_t, v_t); + } + } + } + return out; + } +}; + +CorrespondenceSetPixelWise ComputeCorrespondence( + const Eigen::Matrix3d &intrinsic_matrix, + const Eigen::Matrix4d &extrinsic, + const geometry::Image &depth_s, + const geometry::Image &depth_t, + const OdometryOption &option) { + const Eigen::Matrix3d K = intrinsic_matrix; + const Eigen::Matrix3d K_inv = K.inverse(); + const Eigen::Matrix3d R = extrinsic.block<3, 3>(0, 0); + const Eigen::Matrix3d KRK_inv = K * R * K_inv; + const Eigen::Vector3d Kt = K * extrinsic.block<3, 1>(0, 3); + + CorrespondenceReduction reducer(depth_s, depth_t, + KRK_inv, Kt, option.depth_diff_max_); + tbb::parallel_reduce(tbb::blocked_range2d( + 0, depth_s.height_, utility::DefaultGrainSizeTBB2D(), + 0, depth_s.width_, utility::DefaultGrainSizeTBB2D()), reducer); + + return reducer.correspondences(); } static std::shared_ptr ConvertDepthImageToXYZImage( @@ -215,6 +221,54 @@ static std::vector CreateCameraMatrixPyramid( return pyramid_camera_matrix; } +struct InformationMatrixReducer { + // Globals + const CorrespondenceSetPixelWise& corres; + const geometry::Image& xyz_t; + // Locals + Eigen::Matrix6d GTG; + + InformationMatrixReducer(const CorrespondenceSetPixelWise& corres_, + const geometry::Image& xyz_t_) + : corres(corres_), xyz_t(xyz_t_), GTG(Eigen::Matrix6d::Zero()) {} + + InformationMatrixReducer(InformationMatrixReducer& o, tbb::split) + : corres(o.corres), xyz_t(o.xyz_t), GTG(Eigen::Matrix6d::Zero()) {} + + void operator()(const tbb::blocked_range& range) { + // write q^* + // see http://redwood-data.org/indoor/registration.html + // note: I comes first in this implementation + Eigen::Vector6d G_r; + for (std::size_t i = range.begin(); i < range.end(); ++i) { + int u_t = corres[i](2); + int v_t = corres[i](3); + double x = *xyz_t.PointerAt(u_t, v_t, 0); + double y = *xyz_t.PointerAt(u_t, v_t, 1); + double z = *xyz_t.PointerAt(u_t, v_t, 2); + G_r.setZero(); + G_r(1) = z; + G_r(2) = -y; + G_r(3) = 1.0; + GTG.noalias() += G_r * G_r.transpose(); + G_r.setZero(); + G_r(0) = -z; + G_r(2) = x; + G_r(4) = 1.0; + GTG.noalias() += G_r * G_r.transpose(); + G_r.setZero(); + G_r(0) = y; + G_r(1) = -x; + G_r(5) = 1.0; + GTG.noalias() += G_r * G_r.transpose(); + } + } + + void join(InformationMatrixReducer& other) { + GTG += other.GTG; + } +}; + static Eigen::Matrix6d CreateInformationMatrix( const Eigen::Matrix4d &extrinsic, const camera::PinholeCameraIntrinsic &pinhole_camera_intrinsic, @@ -228,41 +282,10 @@ static Eigen::Matrix6d CreateInformationMatrix( auto xyz_t = ConvertDepthImageToXYZImage( depth_t, pinhole_camera_intrinsic.intrinsic_matrix_); - // write q^* - // see http://redwood-data.org/indoor/registration.html - // note: I comes first and q_skew is scaled by factor 2. - Eigen::Matrix6d GTG = Eigen::Matrix6d::Identity(); -#pragma omp parallel - { - Eigen::Matrix6d GTG_private = Eigen::Matrix6d::Identity(); - Eigen::Vector6d G_r_private = Eigen::Vector6d::Zero(); -#pragma omp for nowait - for (int row = 0; row < int(correspondence.size()); row++) { - int u_t = correspondence[row](2); - int v_t = correspondence[row](3); - double x = *xyz_t->PointerAt(u_t, v_t, 0); - double y = *xyz_t->PointerAt(u_t, v_t, 1); - double z = *xyz_t->PointerAt(u_t, v_t, 2); - G_r_private.setZero(); - G_r_private(1) = z; - G_r_private(2) = -y; - G_r_private(3) = 1.0; - GTG_private.noalias() += G_r_private * G_r_private.transpose(); - G_r_private.setZero(); - G_r_private(0) = -z; - G_r_private(2) = x; - G_r_private(4) = 1.0; - GTG_private.noalias() += G_r_private * G_r_private.transpose(); - G_r_private.setZero(); - G_r_private(0) = y; - G_r_private(1) = -x; - G_r_private(5) = 1.0; - GTG_private.noalias() += G_r_private * G_r_private.transpose(); - } -#pragma omp critical(CreateInformationMatrix) - { GTG += GTG_private; } - } - return GTG; + InformationMatrixReducer reducer(correspondence, *xyz_t.get()); + tbb::parallel_reduce(tbb::blocked_range(0, + correspondence.size(), utility::DefaultGrainSizeTBB()), reducer); + return std::move(reducer.GTG); } static void NormalizeIntensity( diff --git a/cpp/open3d/pipelines/registration/FastGlobalRegistration.cpp b/cpp/open3d/pipelines/registration/FastGlobalRegistration.cpp index f0e48b43afe..da9dab27a82 100644 --- a/cpp/open3d/pipelines/registration/FastGlobalRegistration.cpp +++ b/cpp/open3d/pipelines/registration/FastGlobalRegistration.cpp @@ -8,6 +8,8 @@ #include "open3d/pipelines/registration/FastGlobalRegistration.h" #include +#include +#include #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/PointCloud.h" @@ -15,6 +17,7 @@ #include "open3d/pipelines/registration/Registration.h" #include "open3d/utility/Logging.h" #include "open3d/utility/Random.h" +#include "open3d/utility/Parallel.h" namespace open3d { namespace pipelines { @@ -24,34 +27,44 @@ static std::vector> InitialMatching( const Feature& src_features, const Feature& dst_features) { geometry::KDTreeFlann src_feature_tree(src_features); geometry::KDTreeFlann dst_feature_tree(dst_features); - std::map corres_ij; + std::vector corres_ij(src_features.data_.cols(), -1); std::vector corres_ji(dst_features.data_.cols(), -1); -#pragma omp for nowait - for (int j = 0; j < dst_features.data_.cols(); j++) { - std::vector corres_tmp(1); - std::vector dist_tmp(1); - src_feature_tree.SearchKNN(Eigen::VectorXd(dst_features.data_.col(j)), - 1, corres_tmp, dist_tmp); - int i = corres_tmp[0]; - corres_ji[j] = i; - if (corres_ij.find(i) == corres_ij.end()) { - // set a temp value to prevent other threads recomputing - // corres_ij[i] until the following dst_feature_tree.SearchKNN() - // call completes. There is still a race condition but the result - // would be fine since both threads will compute the same result - corres_ij[i] = -1; - dst_feature_tree.SearchKNN( - Eigen::VectorXd(src_features.data_.col(i)), 1, corres_tmp, - dist_tmp); - corres_ij[i] = corres_tmp[0]; - } - } + tbb::parallel_invoke([&](){ + tbb::parallel_for(tbb::blocked_range(0, + src_features.data_.cols(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { + std::vector corres_tmp(1); + std::vector dist_tmp(1); + dst_feature_tree.SearchKNN( + Eigen::VectorXd(src_features.data_.col(i)), + 1, corres_tmp, dist_tmp); + corres_ij[i] = corres_tmp[0]; + } + }); + }, [&](){ + tbb::parallel_for(tbb::blocked_range(0, + dst_features.data_.cols(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (int j = range.begin(); j < range.end(); ++j) { + std::vector corres_tmp(1); + std::vector dist_tmp(1); + src_feature_tree.SearchKNN( + Eigen::VectorXd(dst_features.data_.col(j)), + 1, corres_tmp, dist_tmp); + corres_ji[j] = corres_tmp[0]; + } + }); + }); utility::LogDebug("\t[cross check] "); std::vector> corres_cross; - for (const std::pair& ij : corres_ij) { - if (corres_ji[ij.second] == ij.first) corres_cross.push_back(ij); + for (int i = 0; i < static_cast(corres_ij.size()); ++i) { + int j = corres_ij[i]; + if (corres_ji[j] == i) { + corres_cross.emplace_back(i, j); + } } utility::LogDebug("Initial matchings : {}", corres_cross.size()); return corres_cross; diff --git a/cpp/open3d/pipelines/registration/Feature.cpp b/cpp/open3d/pipelines/registration/Feature.cpp index 2815159dcc4..73580575b02 100644 --- a/cpp/open3d/pipelines/registration/Feature.cpp +++ b/cpp/open3d/pipelines/registration/Feature.cpp @@ -9,6 +9,8 @@ #include +#include + #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/PointCloud.h" #include "open3d/utility/Logging.h" @@ -58,36 +60,38 @@ static std::shared_ptr ComputeSPFHFeature( const geometry::KDTreeSearchParam &search_param) { auto feature = std::make_shared(); feature->Resize(33, (int)input.points_.size()); -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)input.points_.size(); i++) { - const auto &point = input.points_[i]; - const auto &normal = input.normals_[i]; - std::vector indices; - std::vector distance2; - if (kdtree.Search(point, search_param, indices, distance2) > 1) { - // only compute SPFH feature when a point has neighbors - double hist_incr = 100.0 / (double)(indices.size() - 1); - for (size_t k = 1; k < indices.size(); k++) { - // skip the point itself, compute histogram - auto pf = ComputePairFeatures(point, normal, - input.points_[indices[k]], - input.normals_[indices[k]]); - int h_index = (int)(floor(11 * (pf(0) + M_PI) / (2.0 * M_PI))); - if (h_index < 0) h_index = 0; - if (h_index >= 11) h_index = 10; - feature->data_(h_index, i) += hist_incr; - h_index = (int)(floor(11 * (pf(1) + 1.0) * 0.5)); - if (h_index < 0) h_index = 0; - if (h_index >= 11) h_index = 10; - feature->data_(h_index + 11, i) += hist_incr; - h_index = (int)(floor(11 * (pf(2) + 1.0) * 0.5)); - if (h_index < 0) h_index = 0; - if (h_index >= 11) h_index = 10; - feature->data_(h_index + 22, i) += hist_incr; + tbb::parallel_for(tbb::blocked_range( + 0, input.points_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + const auto &point = input.points_[i]; + const auto &normal = input.normals_[i]; + std::vector indices; + std::vector distance2; + if (kdtree.Search(point, search_param, indices, distance2) > 1) { + // only compute SPFH feature when a point has neighbors + double hist_incr = 100.0 / (double)(indices.size() - 1); + for (size_t k = 1; k < indices.size(); k++) { + // skip the point itself, compute histogram + auto pf = ComputePairFeatures(point, normal, + input.points_[indices[k]], + input.normals_[indices[k]]); + int h_index = (int)(floor(11 * (pf(0) + M_PI) / (2.0 * M_PI))); + if (h_index < 0) h_index = 0; + if (h_index >= 11) h_index = 10; + feature->data_(h_index, i) += hist_incr; + h_index = (int)(floor(11 * (pf(1) + 1.0) * 0.5)); + if (h_index < 0) h_index = 0; + if (h_index >= 11) h_index = 10; + feature->data_(h_index + 11, i) += hist_incr; + h_index = (int)(floor(11 * (pf(2) + 1.0) * 0.5)); + if (h_index < 0) h_index = 0; + if (h_index >= 11) h_index = 10; + feature->data_(h_index + 22, i) += hist_incr; + } } } - } + }); return feature; } @@ -105,37 +109,40 @@ std::shared_ptr ComputeFPFHFeature( if (spfh == nullptr) { utility::LogError("Internal error: SPFH feature is nullptr."); } -#pragma omp parallel for schedule(static) \ - num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)input.points_.size(); i++) { - const auto &point = input.points_[i]; - std::vector indices; - std::vector distance2; - if (kdtree.Search(point, search_param, indices, distance2) > 1) { - double sum[3] = {0.0, 0.0, 0.0}; - for (size_t k = 1; k < indices.size(); k++) { - // skip the point itself - double dist = distance2[k]; - if (dist == 0.0) continue; + + tbb::parallel_for(tbb::blocked_range( + 0, input.points_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + const auto &point = input.points_[i]; + std::vector indices; + std::vector distance2; + if (kdtree.Search(point, search_param, indices, distance2) > 1) { + double sum[3] = {0.0, 0.0, 0.0}; + for (size_t k = 1; k < indices.size(); k++) { + // skip the point itself + double dist = distance2[k]; + if (dist == 0.0) continue; + for (int j = 0; j < 33; j++) { + double val = spfh->data_(j, indices[k]) / dist; + sum[j / 11] += val; + feature->data_(j, i) += val; + } + } + for (int j = 0; j < 3; j++) + if (sum[j] != 0.0) sum[j] = 100.0 / sum[j]; for (int j = 0; j < 33; j++) { - double val = spfh->data_(j, indices[k]) / dist; - sum[j / 11] += val; - feature->data_(j, i) += val; + feature->data_(j, i) *= sum[j / 11]; + // The commented line is the fpfh function in the paper. + // But according to PCL implementation, it is skipped. + // Our initial test shows that the full fpfh function in the + // paper seems to be better than PCL implementation. Further + // test required. + feature->data_(j, i) += spfh->data_(j, i); } } - for (int j = 0; j < 3; j++) - if (sum[j] != 0.0) sum[j] = 100.0 / sum[j]; - for (int j = 0; j < 33; j++) { - feature->data_(j, i) *= sum[j / 11]; - // The commented line is the fpfh function in the paper. - // But according to PCL implementation, it is skipped. - // Our initial test shows that the full fpfh function in the - // paper seems to be better than PCL implementation. Further - // test required. - feature->data_(j, i) += spfh->data_(j, i); - } } - } + }); return feature; } @@ -153,33 +160,33 @@ CorrespondenceSet CorrespondencesFromFeatures(const Feature &source_features, int(target_features.data_.cols())}; std::vector corres(num_searches); - const int kMaxThreads = utility::EstimateMaxThreads(); - const int kOuterThreads = std::min(kMaxThreads, num_searches); - const int kInnerThreads = std::max(kMaxThreads / num_searches, 1); - (void)kOuterThreads; // Avoids compiler warning if OpenMP is disabled - (void)kInnerThreads; -#pragma omp parallel for num_threads(kOuterThreads) - for (int k = 0; k < num_searches; ++k) { - geometry::KDTreeFlann kdtree(features[1 - k]); - - int num_pts_k = num_pts[k]; - corres[k] = CorrespondenceSet(num_pts_k); -#pragma omp parallel for num_threads(kInnerThreads) - for (int i = 0; i < num_pts_k; i++) { - std::vector corres_tmp(1); - std::vector dist_tmp(1); - - kdtree.SearchKNN(Eigen::VectorXd(features[k].get().data_.col(i)), 1, - corres_tmp, dist_tmp); - int j = corres_tmp[0]; - corres[k][i] = Eigen::Vector2i(i, j); + tbb::parallel_for(tbb::blocked_range(0, num_searches, 1), + [&](const tbb::blocked_range& range) { + for (int k = range.begin(); k < range.end(); ++k) { + geometry::KDTreeFlann kdtree(features[1 - k]); + + int num_pts_k = num_pts[k]; + corres[k] = CorrespondenceSet(num_pts_k); + tbb::parallel_for(tbb::blocked_range( + 0, num_pts_k, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { + std::vector corres_tmp(1); + std::vector dist_tmp(1); + + kdtree.SearchKNN( + Eigen::VectorXd(features[k].get().data_.col(i)), + 1, corres_tmp, dist_tmp); + int j = corres_tmp[0]; + corres[k][i] = Eigen::Vector2i(i, j); + } + }); } - } + }); // corres[0]: corres_ij, corres[1]: corres_ji if (!mutual_filter) return corres[0]; - // should not use parallel for due to emplace back CorrespondenceSet corres_mutual; int num_src_pts = num_pts[0]; for (int i = 0; i < num_src_pts; ++i) { diff --git a/cpp/open3d/pipelines/registration/GeneralizedICP.cpp b/cpp/open3d/pipelines/registration/GeneralizedICP.cpp index c529fdcc639..99911fb0327 100644 --- a/cpp/open3d/pipelines/registration/GeneralizedICP.cpp +++ b/cpp/open3d/pipelines/registration/GeneralizedICP.cpp @@ -14,6 +14,7 @@ #include #include +#include #include "open3d/geometry/KDTreeSearchParam.h" #include "open3d/geometry/PointCloud.h" @@ -64,11 +65,16 @@ std::shared_ptr InitializePointCloudForGeneralizedICP( output->covariances_.resize(output->points_.size()); const Eigen::Matrix3d C = Eigen::Vector3d(epsilon, 1, 1).asDiagonal(); -#pragma omp parallel for schedule(static) - for (int i = 0; i < (int)output->normals_.size(); i++) { - const auto Rx = GetRotationFromE1ToX(output->normals_[i]); - output->covariances_[i] = Rx * C * Rx.transpose(); - } + + tbb::parallel_for(tbb::blocked_range( + 0, output->normals_.size(), 1), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + const auto Rx = GetRotationFromE1ToX(output->normals_[i]); + output->covariances_[i] = Rx * C * Rx.transpose(); + } + }); + return output; } } // namespace diff --git a/cpp/open3d/pipelines/registration/Registration.cpp b/cpp/open3d/pipelines/registration/Registration.cpp index 6820d2f7c33..a418ee9038a 100644 --- a/cpp/open3d/pipelines/registration/Registration.cpp +++ b/cpp/open3d/pipelines/registration/Registration.cpp @@ -7,6 +7,9 @@ #include "open3d/pipelines/registration/Registration.h" +#include +#include + #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/PointCloud.h" #include "open3d/pipelines/registration/Feature.h" @@ -18,6 +21,50 @@ namespace open3d { namespace pipelines { namespace registration { +struct RegistrationReduction { + // Globals: + const geometry::KDTreeFlann& target_kdtree; + const geometry::PointCloud& source; + double max_distance; + // Locals: + CorrespondenceSet correspondences; + double error2; + + RegistrationReduction(const geometry::KDTreeFlann& target_kdtree_, + const geometry::PointCloud& source_, + double max_correspondence_distance_) + : target_kdtree(target_kdtree_), source(source_), + max_distance(max_correspondence_distance_), + correspondences(), error2(0.0) {} + + RegistrationReduction(RegistrationReduction& other, tbb::split) + : target_kdtree(other.target_kdtree), source(other.source), + max_distance(other.max_distance), correspondences(), error2(0.0) {} + + void operator()(const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices(1); + std::vector dists(1); + const auto &point = source.points_[i]; + if (target_kdtree.SearchHybrid(point, + max_distance, 1, indices, dists) > 0) { + error2 += dists[0]; + correspondences.emplace_back(i, indices[0]); + } + } + } + + void join(RegistrationReduction& other) { + correspondences.insert(correspondences.end(), + other.correspondences.begin(), other.correspondences.end()); + error2 += other.error2; + } + + std::tuple as_tuple() && { + return {std::move(correspondences), error2}; + } +}; + static RegistrationResult GetRegistrationResultAndCorrespondences( const geometry::PointCloud &source, const geometry::PointCloud &target, @@ -29,33 +76,14 @@ static RegistrationResult GetRegistrationResultAndCorrespondences( return result; } - double error2 = 0.0; + RegistrationReduction reducer(target_kdtree, + source, max_correspondence_distance); + tbb::parallel_reduce(tbb::blocked_range( + 0, source.points_.size(), utility::DefaultGrainSizeTBB()), reducer); -#pragma omp parallel - { - double error2_private = 0.0; - CorrespondenceSet correspondence_set_private; -#pragma omp for nowait - for (int i = 0; i < (int)source.points_.size(); i++) { - std::vector indices(1); - std::vector dists(1); - const auto &point = source.points_[i]; - if (target_kdtree.SearchHybrid(point, max_correspondence_distance, - 1, indices, dists) > 0) { - error2_private += dists[0]; - correspondence_set_private.push_back( - Eigen::Vector2i(i, indices[0])); - } - } -#pragma omp critical(GetRegistrationResultAndCorrespondences) - { - for (int i = 0; i < (int)correspondence_set_private.size(); i++) { - result.correspondence_set_.push_back( - correspondence_set_private[i]); - } - error2 += error2_private; - } - } + double error2; + std::tie(result.correspondence_set_, error2) + = std::move(reducer).as_tuple(); if (result.correspondence_set_.empty()) { result.fitness_ = 0.0; @@ -167,39 +195,64 @@ RegistrationResult RegistrationICP( return result; } -RegistrationResult RegistrationRANSACBasedOnCorrespondence( - const geometry::PointCloud &source, - const geometry::PointCloud &target, - const CorrespondenceSet &corres, - double max_correspondence_distance, - const TransformationEstimation &estimation - /* = TransformationEstimationPointToPoint(false)*/, - int ransac_n /* = 3*/, - const std::vector> - &checkers /* = {}*/, - const RANSACConvergenceCriteria &criteria - /* = RANSACConvergenceCriteria()*/) { - if (ransac_n < 3 || (int)corres.size() < ransac_n || - max_correspondence_distance <= 0.0) { - return RegistrationResult(); - } +template +void atomic_min(std::atomic& min_val, const T& val) noexcept { + T prev_val = min_val; + while (prev_val > val && min_val.compare_exchange_weak(prev_val, val)); +} +struct RANSACCorrespondenceReduction { + // Globals + const geometry::PointCloud& source; + const geometry::PointCloud& target; + const CorrespondenceSet& corres; + const TransformationEstimation& estimation; + using CheckerType = std::vector>; + const CheckerType& checkers; + const geometry::KDTreeFlann& kdtree; + std::atomic& est_k_global; + std::atomic& total_validation; + using RandomIntGen = utility::random::UniformIntGenerator; + RandomIntGen& rand_gen; + // Constants + const double max_distance; + const int ransac_n; + const double log_confidence; + // Locals + CorrespondenceSet ransac_corres; RegistrationResult best_result; - geometry::KDTreeFlann kdtree(target); - int est_k_global = criteria.max_iteration_; - int total_validation = 0; - -#pragma omp parallel - { - CorrespondenceSet ransac_corres(ransac_n); - RegistrationResult best_result_local; - int est_k_local = criteria.max_iteration_; - utility::random::UniformIntGenerator rand_gen(0, - corres.size() - 1); - -#pragma omp for nowait - for (int itr = 0; itr < criteria.max_iteration_; itr++) { - if (itr < est_k_global) { + + + RANSACCorrespondenceReduction(const geometry::PointCloud& source_, + const geometry::PointCloud& target_, + const CorrespondenceSet& corres_, + const TransformationEstimation& estimation_, + const CheckerType& checkers_, + const geometry::KDTreeFlann& kdtree_, + std::atomic& est_k_global_, + std::atomic& total_validation_, + RandomIntGen& rand_gen_, + double max_dist_, + int ransac_n_, + double confidence) + : source(source_), target(target_), corres(corres_), + estimation(estimation_), checkers(checkers_), kdtree(kdtree_), + est_k_global(est_k_global_), total_validation(total_validation_), + rand_gen(rand_gen_), max_distance(max_dist_), ransac_n(ransac_n_), + log_confidence(std::log(1.0 - confidence)), ransac_corres(ransac_n) {} + + RANSACCorrespondenceReduction(RANSACCorrespondenceReduction& o, + tbb::split): source(o.source), target(o.target), corres(o.corres), + estimation(o.estimation), checkers(o.checkers), kdtree(o.kdtree), + est_k_global(o.est_k_global), total_validation(o.total_validation), + rand_gen(o.rand_gen), max_distance(o.max_distance), + ransac_n(o.ransac_n), log_confidence(o.log_confidence) {} + + void operator()(const tbb::blocked_range& range) { + int est_k_local = est_k_global; + for (int i = range.begin(); i < range.end(); ++i) { + if (i < est_k_global) { for (int j = 0; j < ransac_n; j++) { ransac_corres[j] = corres[rand_gen()]; } @@ -209,69 +262,83 @@ RegistrationResult RegistrationRANSACBasedOnCorrespondence( ransac_corres); // Check transformation: inexpensive - bool check = true; - for (const auto &checker : checkers) { - if (!checker.get().Check(source, target, ransac_corres, - transformation)) { - check = false; - break; - } + if (!std::all_of(checkers.begin(), checkers.end(), + [&](const auto& checker){ return checker.get().Check( + source, target, ransac_corres, transformation); })) { + continue; } - if (!check) continue; // Expensive validation geometry::PointCloud pcd = source; pcd.Transform(transformation); auto result = GetRegistrationResultAndCorrespondences( - pcd, target, kdtree, max_correspondence_distance, - transformation); + pcd, target, kdtree, max_distance, transformation); - if (result.IsBetterRANSACThan(best_result_local)) { - best_result_local = result; + if (result.IsBetterRANSACThan(best_result)) { + best_result = std::move(result); double corres_inlier_ratio = EvaluateInlierCorrespondenceRatio( - pcd, target, corres, - max_correspondence_distance, + pcd, target, corres, max_distance, transformation); - // Update exit condition if necessary - double est_k_local_d = - std::log(1.0 - criteria.confidence_) / - std::log(1.0 - - std::pow(corres_inlier_ratio, ransac_n)); - // This prevents having a negative number of iterations: - // est_k_local_d = -inf if corres_inlier_ratio = 0.0 - est_k_local_d = - est_k_local_d < 0 ? est_k_local : est_k_local_d; - est_k_local = - est_k_local_d < est_k_global - ? static_cast(std::ceil(est_k_local_d)) - : est_k_local; + // Update exit condition if necessary. + // If confidence is 1.0, then it is safely inf, we always + // consume all the iterations. + double est_k_local_d = log_confidence / + std::log(1.0 - std::pow(corres_inlier_ratio, ransac_n)); + if (est_k_local_d < 0) { + est_k_local_d = est_k_local; + } + if (est_k_local_d < est_k_global) { + est_k_local = std::ceil(est_k_local_d); + } utility::LogDebug( "Thread {:06d}: registration fitness={:.3f}, " - "corres inlier ratio={:.3f}, " - "Est. max k = {}", - itr, result.fitness_, corres_inlier_ratio, + "corres inlier ratio={:.3f}, Est. max k = {}", + i, best_result.fitness_, corres_inlier_ratio, est_k_local_d); } -#pragma omp critical - { - total_validation += 1; - if (est_k_local < est_k_global) { - est_k_global = est_k_local; - } - } - } // if - } // for loop - -#pragma omp critical(RegistrationRANSACBasedOnCorrespondence) - { - if (best_result_local.IsBetterRANSACThan(best_result)) { - best_result = best_result_local; + total_validation += 1; + atomic_min(est_k_global, est_k_local); } } } + + void join(RANSACCorrespondenceReduction& other) { + if (!best_result.IsBetterRANSACThan(other.best_result)) { + best_result = std::move(other.best_result); + } + } +}; + +RegistrationResult RegistrationRANSACBasedOnCorrespondence( + const geometry::PointCloud &source, + const geometry::PointCloud &target, + const CorrespondenceSet &corres, + double max_correspondence_distance, + const TransformationEstimation &estimation + /* = TransformationEstimationPointToPoint(false)*/, + int ransac_n /* = 3*/, + const std::vector> + &checkers /* = {}*/, + const RANSACConvergenceCriteria &criteria + /* = RANSACConvergenceCriteria()*/) { + if (ransac_n < 3 || (int)corres.size() < ransac_n || + max_correspondence_distance <= 0.0) { + return {}; + } + + geometry::KDTreeFlann kdtree(target); + std::atomic est_k_global = criteria.max_iteration_; + std::atomic total_validation = 0; + utility::random::UniformIntGenerator rand_gen(0, corres.size() - 1); + RANSACCorrespondenceReduction reducer(source, target, corres, estimation, + checkers, kdtree, est_k_global, total_validation, rand_gen, + max_correspondence_distance, ransac_n, criteria.confidence_); + tbb::parallel_reduce(tbb::blocked_range( + 0, criteria.max_iteration_, utility::DefaultGrainSizeTBB()), reducer); + auto best_result = std::move(reducer.best_result); utility::LogDebug( "RANSAC exits after {:d} validations. Best inlier ratio {:e}, " "RMSE {:e}", @@ -305,6 +372,53 @@ RegistrationResult RegistrationRANSACBasedOnFeatureMatching( ransac_n, checkers, criteria); } +struct InformationMatrixReducer { + // Globals + const CorrespondenceSet& corres; + const geometry::PointCloud& target; + // Locals + Eigen::Matrix6d GTG; + + InformationMatrixReducer(const CorrespondenceSet& corres_, + const geometry::PointCloud& target_) + : corres(corres_), target(target_), GTG(Eigen::Matrix6d::Zero()) {} + + InformationMatrixReducer(InformationMatrixReducer& o, tbb::split) + : corres(o.corres), target(o.target), GTG(Eigen::Matrix6d::Zero()) {} + + void operator()(const tbb::blocked_range& range) { + // write q^* + // see http://redwood-data.org/indoor/registration.html + // note: I comes first in this implementation + Eigen::Vector6d G_r; + for (std::size_t i = range.begin(); i < range.end(); ++i) { + int t = corres[i](1); + double x = target.points_[t](0); + double y = target.points_[t](1); + double z = target.points_[t](2); + G_r.setZero(); + G_r(1) = z; + G_r(2) = -y; + G_r(3) = 1.0; + GTG.noalias() += G_r * G_r.transpose(); + G_r.setZero(); + G_r(0) = -z; + G_r(2) = x; + G_r(4) = 1.0; + GTG.noalias() += G_r * G_r.transpose(); + G_r.setZero(); + G_r(0) = y; + G_r(1) = -x; + G_r(5) = 1.0; + GTG.noalias() += G_r * G_r.transpose(); + } + } + + void join(InformationMatrixReducer& other) { + GTG += other.GTG; + } +}; + Eigen::Matrix6d GetInformationMatrixFromPointClouds( const geometry::PointCloud &source, const geometry::PointCloud &target, @@ -320,40 +434,13 @@ Eigen::Matrix6d GetInformationMatrixFromPointClouds( pcd, target, target_kdtree, max_correspondence_distance, transformation); - // write q^* - // see http://redwood-data.org/indoor/registration.html - // note: I comes first in this implementation - Eigen::Matrix6d GTG = Eigen::Matrix6d::Zero(); -#pragma omp parallel - { - Eigen::Matrix6d GTG_private = Eigen::Matrix6d::Zero(); - Eigen::Vector6d G_r_private = Eigen::Vector6d::Zero(); -#pragma omp for nowait - for (int c = 0; c < int(result.correspondence_set_.size()); c++) { - int t = result.correspondence_set_[c](1); - double x = target.points_[t](0); - double y = target.points_[t](1); - double z = target.points_[t](2); - G_r_private.setZero(); - G_r_private(1) = z; - G_r_private(2) = -y; - G_r_private(3) = 1.0; - GTG_private.noalias() += G_r_private * G_r_private.transpose(); - G_r_private.setZero(); - G_r_private(0) = -z; - G_r_private(2) = x; - G_r_private(4) = 1.0; - GTG_private.noalias() += G_r_private * G_r_private.transpose(); - G_r_private.setZero(); - G_r_private(0) = y; - G_r_private(1) = -x; - G_r_private(5) = 1.0; - GTG_private.noalias() += G_r_private * G_r_private.transpose(); - } -#pragma omp critical(GetInformationMatrixFromPointClouds) - { GTG += GTG_private; } - } - return GTG; + InformationMatrixReducer reducer(result.correspondence_set_, target); + tbb::parallel_reduce( + tbb::blocked_range(0, + result.correspondence_set_.size(), + utility::DefaultGrainSizeTBB()), + reducer); + return std::move(reducer.GTG); } } // namespace registration diff --git a/cpp/open3d/pipelines/registration/Registration.h b/cpp/open3d/pipelines/registration/Registration.h index 276ef6b64e2..6eeabfdf1c7 100644 --- a/cpp/open3d/pipelines/registration/Registration.h +++ b/cpp/open3d/pipelines/registration/Registration.h @@ -109,6 +109,11 @@ class RegistrationResult { inlier_rmse_ < other.inlier_rmse_); } + RegistrationResult(const RegistrationResult&) = default; + RegistrationResult(RegistrationResult&&) = default; + RegistrationResult& operator=(const RegistrationResult&) = default; + RegistrationResult& operator=(RegistrationResult&&) = default; + public: /// The estimated transformation matrix. Eigen::Matrix4d_u transformation_; diff --git a/cpp/open3d/t/geometry/PointCloud.cpp b/cpp/open3d/t/geometry/PointCloud.cpp index b345e0ea985..df3ad04fcaa 100644 --- a/cpp/open3d/t/geometry/PointCloud.cpp +++ b/cpp/open3d/t/geometry/PointCloud.cpp @@ -369,9 +369,9 @@ PointCloud PointCloud::RandomDownSample(double sampling_ratio) const { std::vector indices(length); std::iota(std::begin(indices), std::end(indices), 0); { - std::lock_guard lock(*utility::random::GetMutex()); + tbb::spin_mutex::scoped_lock lock(utility::random::GetMutex()); std::shuffle(indices.begin(), indices.end(), - *utility::random::GetEngine()); + utility::random::GetEngine()); } const int sample_size = sampling_ratio * length; diff --git a/cpp/open3d/t/geometry/kernel/PointCloudCPU.cpp b/cpp/open3d/t/geometry/kernel/PointCloudCPU.cpp index 2fdc949fda3..9fea392807d 100644 --- a/cpp/open3d/t/geometry/kernel/PointCloudCPU.cpp +++ b/cpp/open3d/t/geometry/kernel/PointCloudCPU.cpp @@ -7,6 +7,9 @@ #include "open3d/t/geometry/kernel/PointCloudImpl.h" +#include +#include + namespace open3d { namespace t { namespace geometry { @@ -26,8 +29,6 @@ void ProjectCPU( float depth_max) { const bool has_colors = image_colors.has_value(); - int64_t n = points.GetLength(); - const float* points_ptr = points.GetDataPtr(); const float* point_colors_ptr = has_colors ? colors.value().get().GetDataPtr() : nullptr; @@ -40,7 +41,9 @@ void ProjectCPU( color_indexer = NDArrayIndexer(image_colors.value().get(), 2); } - core::ParallelFor(core::Device("CPU:0"), n, [&](int64_t workload_idx) { + tbb::spin_mutex mtx; + core::ParallelFor(core::Device("CPU:0"), points.GetLength(), + [&](int64_t workload_idx) { float x = points_ptr[3 * workload_idx + 0]; float y = points_ptr[3 * workload_idx + 1]; float z = points_ptr[3 * workload_idx + 2]; @@ -60,10 +63,8 @@ void ProjectCPU( float* depth_ptr = depth_indexer.GetDataPtr( static_cast(u), static_cast(v)); float d = zc * depth_scale; - // TODO: this can be wrong if ParallelFor is not implemented with - // OpenMP. -#pragma omp critical(ProjectCPU) { + tbb::spin_mutex::scoped_lock lock(mtx); if (*depth_ptr == 0 || *depth_ptr >= d) { *depth_ptr = d; diff --git a/cpp/open3d/t/geometry/kernel/UVUnwrapping.cpp b/cpp/open3d/t/geometry/kernel/UVUnwrapping.cpp index 866e620d6bf..e3e23ab4813 100644 --- a/cpp/open3d/t/geometry/kernel/UVUnwrapping.cpp +++ b/cpp/open3d/t/geometry/kernel/UVUnwrapping.cpp @@ -173,6 +173,7 @@ std::tuple ComputeUVAtlas(TriangleMesh& mesh, mesh_partitions.emplace_back(mesh_tmp); } + utility::LogDebug("Partitioning"); std::vector uvatlas_partitions(parallel_partitions); // By default UVAtlas uses a fast mode if there are more than 25k faces. @@ -202,6 +203,7 @@ std::tuple ComputeUVAtlas(TriangleMesh& mesh, LoopFn(tbb::blocked_range(0, parallel_partitions)); } + utility::LogDebug("Combining partitions"); // merge outputs for the packing algorithm UVAtlasPartitionOutput& combined_output = uvatlas_partitions.front(); for (int i = 1; i < parallel_partitions; ++i) { @@ -246,6 +248,7 @@ std::tuple ComputeUVAtlas(TriangleMesh& mesh, output = UVAtlasPartitionOutput(); } + utility::LogDebug("Packing UVAtlas"); HRESULT hr = UVAtlasPack(combined_output.vb, combined_output.ib, DXGI_FORMAT_R32_UINT, width, height, gutter, combined_output.partition_result_adjacency, diff --git a/cpp/open3d/t/geometry/kernel/VoxelBlockGridImpl.h b/cpp/open3d/t/geometry/kernel/VoxelBlockGridImpl.h index dd0fb9dc148..35f78c1539d 100644 --- a/cpp/open3d/t/geometry/kernel/VoxelBlockGridImpl.h +++ b/cpp/open3d/t/geometry/kernel/VoxelBlockGridImpl.h @@ -8,6 +8,10 @@ #include #include +#if !defined(__CUDACC__) +#include +#endif + #include "open3d/core/Dispatch.h" #include "open3d/core/Dtype.h" #include "open3d/core/MemoryManager.h" @@ -346,7 +350,7 @@ void EstimateRangeCPU std::atomic* count_ptr = &count_atomic; #endif -#ifndef __CUDACC__ +#if !defined(__CUDACC__) using std::max; using std::min; #endif @@ -463,10 +467,17 @@ void EstimateRangeCPU range_ptr[1] = depth_min; }); +#if !defined(__CUDACC__) + tbb::spin_mutex estimate_range_mutex; + tbb::profiling::set_name(estimate_range_mutex, "EstimateRangeCPU"); +#define LOCAL_LAMBDA_CAPTURE =, &estimate_range_mutex +#else +#defined LOCAL_LAMBDA_CAPTURE = +#endif // Pass 1: iterate over rendering fragment array, fill-in range core::ParallelFor( block_keys.GetDevice(), frag_count * fragment_size * fragment_size, - [=] OPEN3D_DEVICE(int64_t workload_idx) { + [LOCAL_LAMBDA_CAPTURE] OPEN3D_DEVICE(int64_t workload_idx) { int frag_idx = workload_idx / (fragment_size * fragment_size); int local_idx = workload_idx % (fragment_size * fragment_size); int dv = local_idx / fragment_size; @@ -486,12 +497,12 @@ void EstimateRangeCPU float z_min = frag_ptr[0]; float z_max = frag_ptr[1]; float* range_ptr = range_map_indexer.GetDataPtr(u, v); -#ifdef __CUDACC__ +#if defined(__CUDACC__) atomicMinf(&(range_ptr[0]), z_min); atomicMaxf(&(range_ptr[1]), z_max); #else -#pragma omp critical(EstimateRangeCPU) { + tbb::spin_mutex::scoped_lock lock(estimate_range_mutex); range_ptr[0] = min(z_min, range_ptr[0]); range_ptr[1] = max(z_max, range_ptr[1]); } @@ -501,6 +512,7 @@ void EstimateRangeCPU #if defined(__CUDACC__) core::cuda::Synchronize(); #endif +#undef LOCAL_LAMBDA_CAPTURE if (needed_frag_count != frag_count) { utility::LogInfo("Reallocating {} fragments for EstimateRange (was {})", diff --git a/cpp/open3d/t/io/sensor/RGBDVideoReader.cpp b/cpp/open3d/t/io/sensor/RGBDVideoReader.cpp index 4115a42e12e..e135caea14d 100644 --- a/cpp/open3d/t/io/sensor/RGBDVideoReader.cpp +++ b/cpp/open3d/t/io/sensor/RGBDVideoReader.cpp @@ -9,6 +9,8 @@ #include +#include + #include "open3d/io/IJsonConvertibleIO.h" #include "open3d/io/ImageIO.h" #include "open3d/t/io/sensor/realsense/RSBagReader.h" @@ -53,25 +55,20 @@ void RGBDVideoReader::SaveFrames(const std::string &frame_path, open3d::geometry::Image im_color, im_depth; for (auto tim_rgbd = NextFrame(); !IsEOF() && GetTimestamp() < end_time; ++idx, tim_rgbd = NextFrame()) -#pragma omp parallel sections - { -#pragma omp section - { + tbb::parallel_invoke([&](){ im_color = tim_rgbd.color_.ToLegacy(); auto color_file = fmt::format("{0}/color/{1:05d}.jpg", frame_path, idx); open3d::io::WriteImage(color_file, im_color); utility::LogDebug("Written color image to {}", color_file); - } -#pragma omp section - { + }, [&](){ im_depth = tim_rgbd.depth_.ToLegacy(); auto depth_file = fmt::format("{0}/depth/{1:05d}.png", frame_path, idx); open3d::io::WriteImage(depth_file, im_depth); utility::LogDebug("Written depth image to {}", depth_file); - } - } + }); + utility::LogInfo("Written {} depth and color images to {}/{{depth,color}}/", idx, frame_path); } diff --git a/cpp/open3d/t/pipelines/kernel/FillInLinearSystemImpl.h b/cpp/open3d/t/pipelines/kernel/FillInLinearSystemImpl.h index b541082e2d2..23f3fced488 100644 --- a/cpp/open3d/t/pipelines/kernel/FillInLinearSystemImpl.h +++ b/cpp/open3d/t/pipelines/kernel/FillInLinearSystemImpl.h @@ -5,6 +5,10 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- +#if !defined(__CUDACC__) +#include +#endif + #include "open3d/core/linalg/kernel/SVD3x3.h" #include "open3d/t/geometry/kernel/GeometryIndexer.h" #include "open3d/t/pipelines/kernel/FillInLinearSystem.h" @@ -49,8 +53,18 @@ void FillInRigidAlignmentTermCPU const float *Ri_normal_ps_ptr = static_cast(Ri_normal_ps.GetDataPtr()); +#if !defined(__CUDACC__) + tbb::spin_mutex fill_alignment_mutex; + tbb::profiling::set_name(fill_alignment_mutex, + "FillInRigidAlignmentTermCPU"); +#define LOCAL_LAMBDA_CAPTURE =, &fill_alignment_mutex +#else +#defined LOCAL_LAMBDA_CAPTURE = +#endif + core::ParallelFor( - AtA.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) { + AtA.GetDevice(), n, [LOCAL_LAMBDA_CAPTURE] + OPEN3D_DEVICE(int64_t workload_idx) { const float *p_prime = Ti_ps_ptr + 3 * workload_idx; const float *q_prime = Tj_qs_ptr + 3 * workload_idx; const float *normal_p_prime = @@ -86,8 +100,8 @@ void FillInRigidAlignmentTermCPU } atomicAdd(residual_ptr, r * r); #else -#pragma omp critical(FillInRigidAlignmentTermCPU) { + tbb::spin_mutex::scoped_lock lock(fill_alignment_mutex); for (int i_local = 0; i_local < 12; ++i_local) { for (int j_local = 0; j_local < 12; ++j_local) { AtA_local_ptr[i_local * 12 + j_local] @@ -98,7 +112,8 @@ void FillInRigidAlignmentTermCPU *residual_ptr += r * r; } #endif - }); + }); +#undef LOCAL_LAMBDA_CAPTURE // Then fill-in the large linear system std::vector indices_vec(12); @@ -182,8 +197,18 @@ void FillInSLACAlignmentTermCPU const float *cgrid_ratio_qs_ptr = static_cast(cgrid_ratio_qs.GetDataPtr()); +#if !defined(__CUDACC__) + tbb::spin_mutex fill_alignment_mutex; + tbb::profiling::set_name(fill_alignment_mutex, + "FillInSLACAlignmentTermCPU"); +#define LOCAL_LAMBDA_CAPTURE =, &fill_alignment_mutex +#else +#defined LOCAL_LAMBDA_CAPTURE = +#endif + core::ParallelFor( - AtA.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) { + AtA.GetDevice(), n, [LOCAL_LAMBDA_CAPTURE] + OPEN3D_DEVICE(int64_t workload_idx) { const float *Ti_Cp = Ti_Cps_ptr + 3 * workload_idx; const float *Tj_Cq = Tj_Cqs_ptr + 3 * workload_idx; const float *Cnormal_p = Cnormal_ps_ptr + 3 * workload_idx; @@ -259,8 +284,8 @@ void FillInSLACAlignmentTermCPU } atomicAdd(residual_ptr, r * r); #else -#pragma omp critical(FillInSLACAlignmentTermCPU) { + tbb::spin_mutex::scoped_lock lock(fill_alignment_mutex); for (int ki = 0; ki < 60; ++ki) { for (int kj = 0; kj < 60; ++kj) { AtA_ptr[idx[ki] * n_vars + idx[kj]] @@ -271,7 +296,8 @@ void FillInSLACAlignmentTermCPU *residual_ptr += r * r; } #endif - }); + }); +#undef LOCAL_LAMBDA_CAPTURE } #if defined(__CUDACC__) @@ -309,8 +335,17 @@ void FillInSLACRegularizerTermCPU const float *positions_curr_ptr = static_cast(positions_curr.GetDataPtr()); - core::ParallelFor( - AtA.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) { +#if !defined(__CUDACC__) + tbb::spin_mutex fill_alignment_mutex; + tbb::profiling::set_name(fill_alignment_mutex, + "FillInSLACRegularizerTermCPU"); +#define LOCAL_LAMBDA_CAPTURE =, &fill_alignment_mutex +#else +#defined LOCAL_LAMBDA_CAPTURE = +#endif + + core::ParallelFor(AtA.GetDevice(), n, [LOCAL_LAMBDA_CAPTURE] + OPEN3D_DEVICE(int64_t workload_idx) { // Enumerate 6 neighbors int idx_i = grid_idx_ptr[workload_idx]; @@ -442,8 +477,8 @@ void FillInSLACRegularizerTermCPU -weight * local_r[axis]); } #else -#pragma omp critical(FillInSLACRegularizerTermCPU) { + tbb::spin_mutex::scoped_lock lock(fill_alignment_mutex); // Update residual *residual_ptr += weight * (local_r[0] * local_r[0] + local_r[1] * local_r[1] + @@ -469,7 +504,8 @@ void FillInSLACRegularizerTermCPU #endif } } - }); + }); +#undef LOCAL_LAMBDA_CAPTURE } } // namespace kernel } // namespace pipelines diff --git a/cpp/open3d/t/pipelines/kernel/RGBDOdometryCPU.cpp b/cpp/open3d/t/pipelines/kernel/RGBDOdometryCPU.cpp index e140c455e96..8ce0634653d 100644 --- a/cpp/open3d/t/pipelines/kernel/RGBDOdometryCPU.cpp +++ b/cpp/open3d/t/pipelines/kernel/RGBDOdometryCPU.cpp @@ -45,52 +45,44 @@ void ComputeOdometryInformationMatrixCPU(const core::Tensor& source_vertex_map, std::vector A_1x21(21, 0.0); -#ifdef _MSC_VER std::vector zeros_21(21, 0.0); A_1x21 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_21, [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - workload_idx++) { -#else - float* A_reduction = A_1x21.data(); -#pragma omp parallel for reduction(+ : A_reduction[:21]) schedule(static) num_threads(utility::EstimateMaxThreads()) - for (int workload_idx = 0; workload_idx < n; workload_idx++) { -#endif - int y = workload_idx / cols; - int x = workload_idx % cols; - - float J_x[6], J_y[6], J_z[6]; - float rx, ry, rz; - - bool valid = GetJacobianPointToPoint( - x, y, square_dist_thr, source_vertex_indexer, - target_vertex_indexer, ti, J_x, J_y, J_z, rx, ry, - rz); - - if (valid) { - for (int i = 0, j = 0; j < 6; j++) { - for (int k = 0; k <= j; k++) { - A_reduction[i] += J_x[j] * J_x[k]; - A_reduction[i] += J_y[j] * J_y[k]; - A_reduction[i] += J_z[j] * J_z[k]; - i++; - } - } + for (int workload_idx = r.begin(); workload_idx < r.end(); + workload_idx++) { + int y = workload_idx / cols; + int x = workload_idx % cols; + + float J_x[6], J_y[6], J_z[6]; + float rx, ry, rz; + + bool valid = GetJacobianPointToPoint( + x, y, square_dist_thr, source_vertex_indexer, + target_vertex_indexer, ti, J_x, J_y, J_z, rx, ry, + rz); + + if (valid) { + for (int i = 0, j = 0; j < 6; j++) { + for (int k = 0; k <= j; k++) { + A_reduction[i] += J_x[j] * J_x[k]; + A_reduction[i] += J_y[j] * J_y[k]; + A_reduction[i] += J_z[j] * J_z[k]; + i++; } } -#ifdef _MSC_VER - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(21); - for (int j = 0; j < 21; j++) { - result[j] = a[j] + b[j]; - } - return result; - }); -#endif + } + } + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(21); + for (int j = 0; j < 21; j++) { + result[j] = a[j] + b[j]; + } + return result; + }); core::Tensor A_reduction_tensor(A_1x21, {21}, core::Float32, device); float* reduction_ptr = A_reduction_tensor.GetDataPtr(); @@ -145,59 +137,52 @@ void ComputeOdometryResultIntensityCPU( std::vector A_1x29(29, 0.0); -#ifdef _MSC_VER std::vector zeros_29(29, 0.0); A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - workload_idx++) { -#else - float* A_reduction = A_1x29.data(); -#pragma omp parallel for reduction(+ : A_reduction[:29]) schedule(static) num_threads(utility::EstimateMaxThreads()) - for (int workload_idx = 0; workload_idx < n; workload_idx++) { -#endif - int y = workload_idx / cols; - int x = workload_idx % cols; - - float J_I[6]; - float r_I; - - bool valid = GetJacobianIntensity( - x, y, depth_outlier_trunc, source_depth_indexer, - target_depth_indexer, source_intensity_indexer, - target_intensity_indexer, - target_intensity_dx_indexer, - target_intensity_dy_indexer, source_vertex_indexer, - ti, J_I, r_I); - - if (valid) { - float d_huber = HuberDeriv(r_I, intensity_huber_delta); - float r_huber = HuberLoss(r_I, intensity_huber_delta); - - for (int i = 0, j = 0; j < 6; j++) { - for (int k = 0; k <= j; k++) { - A_reduction[i] += J_I[j] * J_I[k]; - i++; - } - A_reduction[21 + j] += J_I[j] * d_huber; - } - A_reduction[27] += r_huber; - A_reduction[28] += 1; + for (int workload_idx = r.begin(); workload_idx < r.end(); + workload_idx++) { + int y = workload_idx / cols; + int x = workload_idx % cols; + + float J_I[6]; + float r_I; + + bool valid = GetJacobianIntensity( + x, y, depth_outlier_trunc, source_depth_indexer, + target_depth_indexer, source_intensity_indexer, + target_intensity_indexer, + target_intensity_dx_indexer, + target_intensity_dy_indexer, source_vertex_indexer, + ti, J_I, r_I); + + if (valid) { + float d_huber = HuberDeriv(r_I, intensity_huber_delta); + float r_huber = HuberLoss(r_I, intensity_huber_delta); + + for (int i = 0, j = 0; j < 6; j++) { + for (int k = 0; k <= j; k++) { + A_reduction[i] += J_I[j] * J_I[k]; + i++; } + A_reduction[21 + j] += J_I[j] * d_huber; } -#ifdef _MSC_VER - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(29); - for (int j = 0; j < 29; j++) { - result[j] = a[j] + b[j]; - } - return result; - }); -#endif + A_reduction[27] += r_huber; + A_reduction[28] += 1; + } + } + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(29); + for (int j = 0; j < 29; j++) { + result[j] = a[j] + b[j]; + } + return result; + }); + core::Tensor A_reduction_tensor(A_1x29, {29}, core::Float32, device); DecodeAndSolve6x6(A_reduction_tensor, delta, inlier_residual, inlier_count); } @@ -245,66 +230,58 @@ void ComputeOdometryResultHybridCPU(const core::Tensor& source_depth, std::vector A_1x29(29, 0.0); -#ifdef _MSC_VER std::vector zeros_29(29, 0.0); A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - workload_idx++) { -#else - float* A_reduction = A_1x29.data(); -#pragma omp parallel for reduction(+ : A_reduction[:29]) schedule(static) num_threads(utility::EstimateMaxThreads()) - for (int workload_idx = 0; workload_idx < n; workload_idx++) { -#endif - int y = workload_idx / cols; - int x = workload_idx % cols; - - float J_I[6], J_D[6]; - float r_I, r_D; - - bool valid = GetJacobianHybrid( - x, y, depth_outlier_trunc, source_depth_indexer, - target_depth_indexer, source_intensity_indexer, - target_intensity_indexer, target_depth_dx_indexer, - target_depth_dy_indexer, - target_intensity_dx_indexer, - target_intensity_dy_indexer, source_vertex_indexer, - ti, J_I, J_D, r_I, r_D); - - if (valid) { - float d_huber_I = - HuberDeriv(r_I, intensity_huber_delta); - float d_huber_D = HuberDeriv(r_D, depth_huber_delta); - - float r_huber_I = HuberLoss(r_I, intensity_huber_delta); - float r_huber_D = HuberLoss(r_D, depth_huber_delta); - - for (int i = 0, j = 0; j < 6; j++) { - for (int k = 0; k <= j; k++) { - A_reduction[i] += - J_I[j] * J_I[k] + J_D[j] * J_D[k]; - i++; - } - A_reduction[21 + j] += - J_I[j] * d_huber_I + J_D[j] * d_huber_D; - } - A_reduction[27] += r_huber_I + r_huber_D; - A_reduction[28] += 1; + for (int workload_idx = r.begin(); workload_idx < r.end(); + workload_idx++) { + int y = workload_idx / cols; + int x = workload_idx % cols; + + float J_I[6], J_D[6]; + float r_I, r_D; + + bool valid = GetJacobianHybrid( + x, y, depth_outlier_trunc, source_depth_indexer, + target_depth_indexer, source_intensity_indexer, + target_intensity_indexer, target_depth_dx_indexer, + target_depth_dy_indexer, + target_intensity_dx_indexer, + target_intensity_dy_indexer, source_vertex_indexer, + ti, J_I, J_D, r_I, r_D); + + if (valid) { + float d_huber_I = + HuberDeriv(r_I, intensity_huber_delta); + float d_huber_D = HuberDeriv(r_D, depth_huber_delta); + + float r_huber_I = HuberLoss(r_I, intensity_huber_delta); + float r_huber_D = HuberLoss(r_D, depth_huber_delta); + + for (int i = 0, j = 0; j < 6; j++) { + for (int k = 0; k <= j; k++) { + A_reduction[i] += + J_I[j] * J_I[k] + J_D[j] * J_D[k]; + i++; } + A_reduction[21 + j] += + J_I[j] * d_huber_I + J_D[j] * d_huber_D; } -#ifdef _MSC_VER - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(29); - for (int j = 0; j < 29; j++) { - result[j] = a[j] + b[j]; - } - return result; - }); -#endif + A_reduction[27] += r_huber_I + r_huber_D; + A_reduction[28] += 1; + } + } + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(29); + for (int j = 0; j < 29; j++) { + result[j] = a[j] + b[j]; + } + return result; + }); core::Tensor A_reduction_tensor(A_1x29, {29}, core::Float32, device); DecodeAndSolve6x6(A_reduction_tensor, delta, inlier_residual, inlier_count); } @@ -337,55 +314,48 @@ void ComputeOdometryResultPointToPlaneCPU( std::vector A_1x29(29, 0.0); -#ifdef _MSC_VER std::vector zeros_29(29, 0.0); A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - workload_idx++) { -#else - float* A_reduction = A_1x29.data(); -#pragma omp parallel for reduction(+ : A_reduction[:29]) schedule(static) num_threads(utility::EstimateMaxThreads()) - for (int workload_idx = 0; workload_idx < n; workload_idx++) { -#endif - int y = workload_idx / cols; - int x = workload_idx % cols; - - float J_ij[6]; - float r; - - bool valid = GetJacobianPointToPlane( - x, y, depth_outlier_trunc, source_vertex_indexer, - target_vertex_indexer, target_normal_indexer, ti, - J_ij, r); - - if (valid) { - float d_huber = HuberDeriv(r, depth_huber_delta); - float r_huber = HuberLoss(r, depth_huber_delta); - for (int i = 0, j = 0; j < 6; j++) { - for (int k = 0; k <= j; k++) { - A_reduction[i] += J_ij[j] * J_ij[k]; - i++; - } - A_reduction[21 + j] += J_ij[j] * d_huber; - } - A_reduction[27] += r_huber; - A_reduction[28] += 1; + for (int workload_idx = r.begin(); workload_idx < r.end(); + workload_idx++) { + int y = workload_idx / cols; + int x = workload_idx % cols; + + float J_ij[6]; + float r; + + bool valid = GetJacobianPointToPlane( + x, y, depth_outlier_trunc, source_vertex_indexer, + target_vertex_indexer, target_normal_indexer, ti, + J_ij, r); + + if (valid) { + float d_huber = HuberDeriv(r, depth_huber_delta); + float r_huber = HuberLoss(r, depth_huber_delta); + for (int i = 0, j = 0; j < 6; j++) { + for (int k = 0; k <= j; k++) { + A_reduction[i] += J_ij[j] * J_ij[k]; + i++; } + A_reduction[21 + j] += J_ij[j] * d_huber; } -#ifdef _MSC_VER - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(29); - for (int j = 0; j < 29; j++) { - result[j] = a[j] + b[j]; - } - return result; - }); -#endif + A_reduction[27] += r_huber; + A_reduction[28] += 1; + } + } + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(29); + for (int j = 0; j < 29; j++) { + result[j] = a[j] + b[j]; + } + return result; + }); + core::Tensor A_reduction_tensor(A_1x29, {29}, core::Float32, device); DecodeAndSolve6x6(A_reduction_tensor, delta, inlier_residual, inlier_count); } diff --git a/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp b/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp index d2ea8b584ab..ba3b440c932 100644 --- a/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp +++ b/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp @@ -41,54 +41,47 @@ static void ComputePosePointToPlaneKernelCPU( // and 28th as inlier_count. std::vector A_1x29(29, 0.0); -#ifdef _WIN32 std::vector zeros_29(29, 0.0); A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, - [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - ++workload_idx) { -#else - scalar_t *A_reduction = A_1x29.data(); -#pragma omp parallel for reduction(+ : A_reduction[:29]) schedule(static) num_threads(utility::EstimateMaxThreads()) - for (int workload_idx = 0; workload_idx < n; ++workload_idx) { -#endif - scalar_t J_ij[6]; - scalar_t r = 0; - - bool valid = kernel::GetJacobianPointToPlane( - workload_idx, source_points_ptr, target_points_ptr, - target_normals_ptr, correspondence_indices, J_ij, - r); - - scalar_t w = GetWeightFromRobustKernel(r); - - if (valid) { - // Dump J, r into JtJ and Jtr - int i = 0; - for (int j = 0; j < 6; ++j) { - for (int k = 0; k <= j; ++k) { - A_reduction[i] += J_ij[j] * w * J_ij[k]; - ++i; - } - A_reduction[21 + j] += J_ij[j] * w * r; - } - A_reduction[27] += r; - A_reduction[28] += 1; + [&](const tbb::blocked_range& range, + std::vector A_reduction) { + for (int workload_idx = range.begin(); workload_idx < range.end(); + ++workload_idx) { + scalar_t J_ij[6]; + scalar_t r = 0; + + bool valid = kernel::GetJacobianPointToPlane( + workload_idx, source_points_ptr, target_points_ptr, + target_normals_ptr, correspondence_indices, J_ij, + r); + + scalar_t w = GetWeightFromRobustKernel(r); + + if (valid) { + // Dump J, r into JtJ and Jtr + int i = 0; + for (int j = 0; j < 6; ++j) { + for (int k = 0; k <= j; ++k) { + A_reduction[i] += J_ij[j] * w * J_ij[k]; + ++i; } + A_reduction[21 + j] += J_ij[j] * w * r; } -#ifdef _WIN32 - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(29); - for (int j = 0; j < 29; ++j) { - result[j] = a[j] + b[j]; - } - return result; - }); -#endif + A_reduction[27] += r; + A_reduction[28] += 1; + } + } + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(29); + for (int j = 0; j < 29; ++j) { + result[j] = a[j] + b[j]; + } + return result; + }); for (int i = 0; i < 29; ++i) { global_sum[i] = A_1x29[i]; @@ -147,59 +140,52 @@ static void ComputePoseColoredICPKernelCPU( // and 28th as inlier_count. std::vector A_1x29(29, 0.0); -#ifdef _WIN32 std::vector zeros_29(29, 0.0); A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, - [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - ++workload_idx) { -#else - scalar_t *A_reduction = A_1x29.data(); -#pragma omp parallel for reduction(+ : A_reduction[:29]) schedule(static) num_threads(utility::EstimateMaxThreads()) - for (int workload_idx = 0; workload_idx < n; ++workload_idx) { -#endif - scalar_t J_G[6] = {0}, J_I[6] = {0}; - scalar_t r_G = 0, r_I = 0; - - bool valid = GetJacobianColoredICP( - workload_idx, source_points_ptr, source_colors_ptr, - target_points_ptr, target_normals_ptr, - target_colors_ptr, target_color_gradients_ptr, - correspondence_indices, sqrt_lambda_geometric, - sqrt_lambda_photometric, J_G, J_I, r_G, r_I); - - scalar_t w_G = GetWeightFromRobustKernel(r_G); - scalar_t w_I = GetWeightFromRobustKernel(r_I); - - if (valid) { - // Dump J, r into JtJ and Jtr - int i = 0; - for (int j = 0; j < 6; ++j) { - for (int k = 0; k <= j; ++k) { - A_reduction[i] += J_G[j] * w_G * J_G[k] + - J_I[j] * w_I * J_I[k]; - ++i; - } - A_reduction[21 + j] += - J_G[j] * w_G * r_G + J_I[j] * w_I * r_I; - } - A_reduction[27] += r_G * r_G + r_I * r_I; - A_reduction[28] += 1; + [&](const tbb::blocked_range& r, + std::vector A_reduction) { + for (int workload_idx = r.begin(); workload_idx < r.end(); + ++workload_idx) { + scalar_t J_G[6] = {0}, J_I[6] = {0}; + scalar_t r_G = 0, r_I = 0; + + bool valid = GetJacobianColoredICP( + workload_idx, source_points_ptr, source_colors_ptr, + target_points_ptr, target_normals_ptr, + target_colors_ptr, target_color_gradients_ptr, + correspondence_indices, sqrt_lambda_geometric, + sqrt_lambda_photometric, J_G, J_I, r_G, r_I); + + scalar_t w_G = GetWeightFromRobustKernel(r_G); + scalar_t w_I = GetWeightFromRobustKernel(r_I); + + if (valid) { + // Dump J, r into JtJ and Jtr + int i = 0; + for (int j = 0; j < 6; ++j) { + for (int k = 0; k <= j; ++k) { + A_reduction[i] += J_G[j] * w_G * J_G[k] + + J_I[j] * w_I * J_I[k]; + ++i; } + A_reduction[21 + j] += + J_G[j] * w_G * r_G + J_I[j] * w_I * r_I; } -#ifdef _WIN32 - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(29); - for (int j = 0; j < 29; ++j) { - result[j] = a[j] + b[j]; - } - return result; - }); -#endif + A_reduction[27] += r_G * r_G + r_I * r_I; + A_reduction[28] += 1; + } + } + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(29); + for (int j = 0; j < 29; ++j) { + result[j] = a[j] + b[j]; + } + return result; + }); for (int i = 0; i < 29; ++i) { global_sum[i] = A_1x29[i]; @@ -585,48 +571,40 @@ void ComputeInformationMatrixKernelCPU(const scalar_t *target_points_ptr, // As, AtA is a symmetric matrix, we only need 21 elements instead of 36. std::vector AtA(21, 0.0); -#ifdef _WIN32 std::vector zeros_21(21, 0.0); AtA = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_21, [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - ++workload_idx) { -#else - scalar_t *A_reduction = AtA.data(); -#pragma omp parallel for reduction(+ : A_reduction[:21]) schedule(static) num_threads(utility::EstimateMaxThreads()) - for (int workload_idx = 0; workload_idx < n; workload_idx++) { -#endif - scalar_t J_x[6] = {0}, J_y[6] = {0}, J_z[6] = {0}; - - bool valid = GetInformationJacobians( - workload_idx, target_points_ptr, - correspondence_indices, J_x, J_y, J_z); - - if (valid) { - int i = 0; - for (int j = 0; j < 6; ++j) { - for (int k = 0; k <= j; ++k) { - A_reduction[i] += J_x[j] * J_x[k] + - J_y[j] * J_y[k] + - J_z[j] * J_z[k]; - ++i; - } - } + for (int workload_idx = r.begin(); workload_idx < r.end(); + ++workload_idx) { + scalar_t J_x[6] = {0}, J_y[6] = {0}, J_z[6] = {0}; + + bool valid = GetInformationJacobians( + workload_idx, target_points_ptr, + correspondence_indices, J_x, J_y, J_z); + + if (valid) { + int i = 0; + for (int j = 0; j < 6; ++j) { + for (int k = 0; k <= j; ++k) { + A_reduction[i] += J_x[j] * J_x[k] + + J_y[j] * J_y[k] + + J_z[j] * J_z[k]; + ++i; } } -#ifdef _WIN32 - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(21); - for (int j = 0; j < 21; ++j) { - result[j] = a[j] + b[j]; - } - return result; - }); -#endif + } + } + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(21); + for (int j = 0; j < 21; ++j) { + result[j] = a[j] + b[j]; + } + return result; + }); for (int i = 0; i < 21; ++i) { global_sum[i] = AtA[i]; diff --git a/cpp/open3d/t/pipelines/registration/Feature.cpp b/cpp/open3d/t/pipelines/registration/Feature.cpp index d20e1a1b58c..56cd9a76e81 100644 --- a/cpp/open3d/t/pipelines/registration/Feature.cpp +++ b/cpp/open3d/t/pipelines/registration/Feature.cpp @@ -7,6 +7,8 @@ #include "open3d/t/pipelines/registration/Feature.h" +#include + #include "open3d/core/nns/NearestNeighborSearch.h" #include "open3d/t/geometry/PointCloud.h" #include "open3d/t/pipelines/kernel/Feature.h" @@ -100,20 +102,18 @@ core::Tensor CorrespondencesFromFeatures(const core::Tensor &source_features, std::array features{source_features, target_features}; std::vector corres(num_searches); - const int kMaxThreads = utility::EstimateMaxThreads(); - const int kOuterThreads = std::min(kMaxThreads, num_searches); - (void)kOuterThreads; // Avoids compiler warning if OpenMP is disabled - // corres[0]: corres_ij, corres[1]: corres_ji -#pragma omp parallel for num_threads(kOuterThreads) - for (int i = 0; i < num_searches; ++i) { - core::nns::NearestNeighborSearch nns(features[1 - i], - core::Dtype::Int64); - nns.KnnIndex(); - auto result = nns.KnnSearch(features[i], 1); - - corres[i] = result.first.View({-1}); - } + tbb::parallel_for(tbb::blocked_range(0, num_searches, 1), + [&](const tbb::blocked_range& range){ + for (int i = range.begin(); i < range.end(); ++i) { + core::nns::NearestNeighborSearch nns( + features[1 - i], core::Dtype::Int64); + nns.KnnIndex(); + auto result = nns.KnnSearch(features[i], 1); + + corres[i] = result.first.View({-1}); + } + }); auto corres_ij = corres[0]; core::Tensor arange_source = diff --git a/cpp/open3d/utility/Eigen.cpp b/cpp/open3d/utility/Eigen.cpp index b3f6fb34476..292834e0050 100644 --- a/cpp/open3d/utility/Eigen.cpp +++ b/cpp/open3d/utility/Eigen.cpp @@ -7,10 +7,15 @@ #include "open3d/utility/Eigen.h" +#include +#include +#include + #include #include #include "open3d/utility/Logging.h" +#include "open3d/utility/Parallel.h" namespace open3d { namespace utility { @@ -142,48 +147,112 @@ SolveJacobianSystemAndObtainExtrinsicMatrixArray(const Eigen::MatrixXd &JTJ, } } +// Shared state for both the single valued and vector valued versions +// This is used instead of lambdas in tbb::parallel reduce +// to prevent extra allocations of the shared state template -std::tuple ComputeJTJandJTr( - std::function f, - int iteration_num, - bool verbose /*=true*/) { - MatType JTJ; - VecType JTr; +struct JTJandJTrReduceBodyHelper { + // Local data + MatType JTJ = MatType::Zero(); + VecType JTr = VecType::Zero(); double r2_sum = 0.0; - JTJ.setZero(); - JTr.setZero(); -#pragma omp parallel - { - MatType JTJ_private; - VecType JTr_private; - double r2_sum_private = 0.0; - JTJ_private.setZero(); - JTr_private.setZero(); - VecType J_r; - J_r.setZero(); - double r = 0.0; - double w = 0.0; -#pragma omp for nowait - for (int i = 0; i < iteration_num; i++) { + + void join(const JTJandJTrReduceBodyHelper& rhs) { + JTJ += rhs.JTJ; + JTr += rhs.JTr; + r2_sum += rhs.r2_sum; + } + + std::tuple as_tuple() && { + return {std::move(JTJ), std::move(JTr), r2_sum}; + } +}; + +template +struct JTJandJTrReduceBody; + +// Code specific to the single valued reduction +template +struct JTJandJTrReduceBody: public + JTJandJTrReduceBodyHelper { + using FuncType = std::function; + // Global data + FuncType& f; + + // Local data + VecType J_r = VecType::Zero(); + double r = 0.0; + double w = 0.0; + + JTJandJTrReduceBody(FuncType& f_) + : JTJandJTrReduceBodyHelper(), f(f_) {} + + JTJandJTrReduceBody(JTJandJTrReduceBody& other, tbb::split sp) + : JTJandJTrReduceBodyHelper(), f(other.f) {} + + void operator()(const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { f(i, J_r, r, w); - JTJ_private.noalias() += J_r * w * J_r.transpose(); - JTr_private.noalias() += J_r * w * r; - r2_sum_private += r * r; - } -#pragma omp critical(ComputeJTJandJTr) - { - JTJ += JTJ_private; - JTr += JTr_private; - r2_sum += r2_sum_private; + this->JTJ.noalias() += J_r * w * J_r.transpose(); + this->JTr.noalias() += J_r * w * r; + this->r2_sum += r * r; } } + +}; + +template +std::tuple ComputeJTJandJTr( + std::function f, + int iteration_num, + bool verbose /*=true*/) { + JTJandJTrReduceBody reducer(f); + tbb::parallel_reduce(tbb::blocked_range(0, iteration_num, + utility::DefaultGrainSizeTBB()), reducer); + std::tuple result = std::move(reducer).as_tuple(); if (verbose) { LogDebug("Residual : {:.2e} (# of elements : {:d})", - r2_sum / (double)iteration_num, iteration_num); + std::get<2>(result) / (double)iteration_num, iteration_num); } - return std::make_tuple(std::move(JTJ), std::move(JTr), r2_sum); + return result; } +// Code specific to the vector valued reduction +template +struct JTJandJTrReduceBody: public + JTJandJTrReduceBodyHelper { + using FuncType = std::function>&, + std::vector&, std::vector&)>; + // Global data + FuncType& f; + + // Local data + std::vector> J_r; + std::vector r; + std::vector w; + + JTJandJTrReduceBody(FuncType& f_) + : JTJandJTrReduceBodyHelper(), + f(f_), J_r(), r(), w() {} + + JTJandJTrReduceBody(JTJandJTrReduceBody& other, tbb::split sp) + : JTJandJTrReduceBodyHelper(), + f(other.f), J_r(), r(), w() {} + + void operator()(const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { + f(i, J_r, r, w); + for (int j = 0; j < (int)r.size(); j++) { + this->JTJ.noalias() += J_r[j] * w[j] * J_r[j].transpose(); + this->JTr.noalias() += J_r[j] * w[j] * r[j]; + this->r2_sum += r[j] * r[j]; + } + } + } + +}; + template std::tuple ComputeJTJandJTr( std::function< @@ -193,42 +262,15 @@ std::tuple ComputeJTJandJTr( std::vector &)> f, int iteration_num, bool verbose /*=true*/) { - MatType JTJ; - VecType JTr; - double r2_sum = 0.0; - JTJ.setZero(); - JTr.setZero(); -#pragma omp parallel - { - MatType JTJ_private; - VecType JTr_private; - double r2_sum_private = 0.0; - JTJ_private.setZero(); - JTr_private.setZero(); - std::vector r; - std::vector w; - std::vector> J_r; -#pragma omp for nowait - for (int i = 0; i < iteration_num; i++) { - f(i, J_r, r, w); - for (int j = 0; j < (int)r.size(); j++) { - JTJ_private.noalias() += J_r[j] * w[j] * J_r[j].transpose(); - JTr_private.noalias() += J_r[j] * w[j] * r[j]; - r2_sum_private += r[j] * r[j]; - } - } -#pragma omp critical(ComputeJTJandJTr) - { - JTJ += JTJ_private; - JTr += JTr_private; - r2_sum += r2_sum_private; - } - } + JTJandJTrReduceBody reducer(f); + tbb::parallel_reduce(tbb::blocked_range(0, iteration_num, + utility::DefaultGrainSizeTBB()), reducer); + std::tuple result = std::move(reducer).as_tuple(); if (verbose) { LogDebug("Residual : {:.2e} (# of elements : {:d})", - r2_sum / (double)iteration_num, iteration_num); + std::get<2>(result) / (double)iteration_num, iteration_num); } - return std::make_tuple(std::move(JTJ), std::move(JTr), r2_sum); + return result; } // clang-format off diff --git a/cpp/open3d/utility/Parallel.cpp b/cpp/open3d/utility/Parallel.cpp index b05f2375a95..5bb4ef733c3 100644 --- a/cpp/open3d/utility/Parallel.cpp +++ b/cpp/open3d/utility/Parallel.cpp @@ -45,13 +45,14 @@ int EstimateMaxThreads() { #endif } -bool InParallel() { - // TODO: when we add TBB/Parallel STL support to ParallelFor, update this. -#ifdef _OPENMP - return omp_in_parallel(); -#else - return false; -#endif +std::size_t& DefaultGrainSizeTBB() noexcept { + static std::size_t GrainSize = 256; + return GrainSize; +} + +std::size_t& DefaultGrainSizeTBB2D() noexcept { + static std::size_t GrainSize = 32; + return GrainSize; } } // namespace utility diff --git a/cpp/open3d/utility/Parallel.h b/cpp/open3d/utility/Parallel.h index 9763f533221..61452ac85c3 100644 --- a/cpp/open3d/utility/Parallel.h +++ b/cpp/open3d/utility/Parallel.h @@ -7,14 +7,22 @@ #pragma once +#include + namespace open3d { namespace utility { /// Estimate the maximum number of threads to be used in a parallel region. int EstimateMaxThreads(); -/// Returns true if in an parallel section. -bool InParallel(); +/// Returns a reference to the default grain size used by TBB. +/// Can be altered if needed. +std::size_t& DefaultGrainSizeTBB() noexcept; + +/// Returns a reference to the default grain size used by TBB +/// for 2d blocked parallel ranges +/// Can be altered if needed +std::size_t& DefaultGrainSizeTBB2D() noexcept; } // namespace utility } // namespace open3d diff --git a/cpp/open3d/utility/ProgressBar.cpp b/cpp/open3d/utility/ProgressBar.cpp index 5a5c6fce1ab..780affbaecb 100644 --- a/cpp/open3d/utility/ProgressBar.cpp +++ b/cpp/open3d/utility/ProgressBar.cpp @@ -13,6 +13,8 @@ #include #endif +#include + namespace open3d { namespace utility { @@ -86,5 +88,28 @@ ProgressBar &OMPProgressBar::operator++() { return *this; } +TBBProgressBar::TBBProgressBar(std::size_t expected_count, + const std::string &progress_info, + bool active) + : ProgressBar(expected_count, progress_info, active), + tbb::task_scheduler_observer(), num_threads(0) { + tbb::task_scheduler_observer::observe(true); +} + +void TBBProgressBar::on_scheduler_entry(bool is_worker) { + if (is_worker) { ++num_threads; } +} +void TBBProgressBar::on_scheduler_exit(bool is_worker) { + if (is_worker) { --num_threads; } +} + +ProgressBar &TBBProgressBar::operator++() { + // Ref: https://stackoverflow.com/a/44555438 + if (tbb::this_task_arena::current_thread_index() == 0) { + SetCurrentCount(current_count_ + num_threads); + } + return *this; +} + } // namespace utility } // namespace open3d diff --git a/cpp/open3d/utility/ProgressBar.h b/cpp/open3d/utility/ProgressBar.h index 710fa370f0e..813730e1156 100644 --- a/cpp/open3d/utility/ProgressBar.h +++ b/cpp/open3d/utility/ProgressBar.h @@ -9,6 +9,8 @@ #include +#include + namespace open3d { namespace utility { @@ -42,5 +44,17 @@ class OMPProgressBar : public ProgressBar { ProgressBar &operator++() override; }; +class TBBProgressBar : public ProgressBar, tbb::task_scheduler_observer { +public: + std::atomic num_threads; + TBBProgressBar(std::size_t expected_count, + const std::string &progress_info, + bool active = false); + ProgressBar &operator++() override; + + void on_scheduler_entry(bool is_worker) override; + void on_scheduler_exit(bool is_worker) override; +}; + } // namespace utility } // namespace open3d diff --git a/cpp/open3d/utility/Random.cpp b/cpp/open3d/utility/Random.cpp index 8d0c54a9697..f04a9c4e626 100644 --- a/cpp/open3d/utility/Random.cpp +++ b/cpp/open3d/utility/Random.cpp @@ -34,10 +34,10 @@ class RandomContext { /// This is used by other downstream random generators. /// You must also lock the GetMutex() before calling the engine. - std::mt19937* GetEngine() { return &engine_; } + std::mt19937& GetEngine() { return engine_; } /// Get global singleton mutex to protect the engine call. - std::mutex* GetMutex() { return &mutex_; } + tbb::spin_mutex& GetMutex() { return mutex_; } private: RandomContext() { @@ -47,18 +47,18 @@ class RandomContext { } int seed_; std::mt19937 engine_; - std::mutex mutex_; + tbb::spin_mutex mutex_; }; void Seed(const int seed) { RandomContext::GetInstance().Seed(seed); } -std::mt19937* GetEngine() { return RandomContext::GetInstance().GetEngine(); } +std::mt19937& GetEngine() { return RandomContext::GetInstance().GetEngine(); } -std::mutex* GetMutex() { return RandomContext::GetInstance().GetMutex(); } +tbb::spin_mutex& GetMutex() { return RandomContext::GetInstance().GetMutex(); } uint32_t RandUint32() { - std::lock_guard lock(*GetMutex()); - return (*GetEngine())(); + tbb::spin_mutex::scoped_lock lock(GetMutex()); + return GetEngine()(); } } // namespace random diff --git a/cpp/open3d/utility/Random.h b/cpp/open3d/utility/Random.h index 1a3411d29a2..6b38a2fd02d 100644 --- a/cpp/open3d/utility/Random.h +++ b/cpp/open3d/utility/Random.h @@ -7,7 +7,7 @@ #pragma once -#include +#include #include #include "open3d/utility/Logging.h" @@ -32,11 +32,11 @@ void Seed(const int seed); /// std::shuffle(vals.begin(), vals.end(), *utility::random::GetEngine()); /// } /// ``` -std::mt19937* GetEngine(); +std::mt19937& GetEngine(); /// Get global singleton mutex to protect the engine call. Also see /// random::GetEngine(). -std::mutex* GetMutex(); +tbb::spin_mutex& GetMutex(); /// Generate a random uint32. /// This function is globally seeded by utility::random::Seed(). @@ -80,8 +80,8 @@ class UniformIntGenerator { /// Call this to generate a uniformly distributed integer. T operator()() { - std::lock_guard lock(*GetMutex()); - return distribution_(*GetEngine()); + tbb::spin_mutex::scoped_lock lock(GetMutex()); + return distribution_(GetEngine()); } protected: @@ -122,8 +122,8 @@ class UniformRealGenerator { /// Call this to generate a uniformly distributed floating point value. T operator()() { - std::lock_guard lock(*GetMutex()); - return distribution_(*GetEngine()); + tbb::spin_mutex::scoped_lock lock(GetMutex()); + return distribution_(GetEngine()); } protected: @@ -163,8 +163,8 @@ class NormalGenerator { /// Call this to generate a normally distributed floating point value. T operator()() { - std::lock_guard lock(*GetMutex()); - return distribution_(*GetEngine()); + tbb::spin_mutex::scoped_lock lock(GetMutex()); + return distribution_(GetEngine()); } protected: diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3d58a53ad84..b2be6212066 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -40,6 +40,7 @@ target_link_libraries(tests PRIVATE Open3D::3rdparty_googletest Open3D::3rdparty_threads Open3D::3rdparty_vtk + Open3D::3rdparty_tbb ) if (TARGET Open3D::3rdparty_openmp) diff --git a/cpp/tests/utility/ProgressBar.cpp b/cpp/tests/utility/ProgressBar.cpp index 4641305813c..f94f9bd55d8 100644 --- a/cpp/tests/utility/ProgressBar.cpp +++ b/cpp/tests/utility/ProgressBar.cpp @@ -10,6 +10,8 @@ #include #include +#include + #include "open3d/utility/Parallel.h" #include "tests/Tests.h" @@ -41,5 +43,20 @@ TEST(ProgressBar, OMPProgressBar) { EXPECT_TRUE(static_cast(progress_bar.GetCurrentCount()) >= iterations); } +TEST(ProgressBar, TBBProgressBar) { + int iterations = 1000; + utility::TBBProgressBar progress_bar( + iterations, "TBBProgressBar test: ", true); + tbb::parallel_for(tbb::blocked_range( + 0, iterations, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + ++progress_bar; + } + }); + EXPECT_TRUE(static_cast(progress_bar.GetCurrentCount()) >= iterations); +} + } // namespace tests } // namespace open3d diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index f98562acdd5..b5d6cf188c3 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -62,11 +62,7 @@ open3d_add_example(SLACIntegrate) open3d_add_example(TIntegrateRGBD) open3d_add_example(TOdometryRGBD) open3d_add_example(TriangleMesh) -if (TARGET Open3D::3rdparty_openmp) - open3d_add_example(TrimMeshBasedOnPointCloud Open3D::3rdparty_openmp) -else () - open3d_add_example(TrimMeshBasedOnPointCloud) -endif() +open3d_add_example(TrimMeshBasedOnPointCloud Open3D::3rdparty_tbb) open3d_add_example(ViewDistances) open3d_add_example(ViewPCDMatch) open3d_add_example(Visualizer) @@ -93,11 +89,7 @@ if(BUILD_WEBRTC) endif() if (BUILD_LIBREALSENSE) - if (TARGET Open3D::3rdparty_openmp) - open3d_add_example(RealSenseBagReader Open3D::3rdparty_librealsense Open3D::3rdparty_jsoncpp Open3D::3rdparty_openmp) - else() - open3d_add_example(RealSenseBagReader Open3D::3rdparty_librealsense Open3D::3rdparty_jsoncpp) - endif() + open3d_add_example(RealSenseBagReader Open3D::3rdparty_librealsense Open3D::3rdparty_jsoncpp Open3D::3rdparty_tbb) open3d_add_example(RealSenseRecorder Open3D::3rdparty_librealsense) endif() diff --git a/examples/cpp/OpenMP.cpp b/examples/cpp/OpenMP.cpp index 0995a0bf49e..bcf707d3cb3 100644 --- a/examples/cpp/OpenMP.cpp +++ b/examples/cpp/OpenMP.cpp @@ -62,7 +62,7 @@ void TestMatrixMultiplication(int argc, char **argv) { } #ifdef _OPENMP - utility::LogInfo("OpenMP is supported."); + utility::LogWarning("OpenMP is support is deprecated."); #else utility::LogInfo("OpenMP is not supported."); #endif diff --git a/examples/cpp/RealSenseBagReader.cpp b/examples/cpp/RealSenseBagReader.cpp index 8fd81397022..8c58801b0da 100644 --- a/examples/cpp/RealSenseBagReader.cpp +++ b/examples/cpp/RealSenseBagReader.cpp @@ -14,6 +14,8 @@ #include #include +#include + #include "open3d/Open3D.h" using namespace open3d; @@ -203,23 +205,18 @@ int main(int argc, char *argv[]) { } ++idx; - if (write_image) -#pragma omp parallel sections - { -#pragma omp section - { + if (write_image) { + tbb::parallel_invoke([&](){ auto color_file = fmt::format("{0}/color/{1:05d}.jpg", output_path, idx); utility::LogInfo("Writing to {}", color_file); io::WriteImage(color_file, im_rgbd.color_); - } -#pragma omp section - { + }, [&](){ auto depth_file = fmt::format("{0}/depth/{1:05d}.png", output_path, idx); utility::LogInfo("Writing to {}", depth_file); io::WriteImage(depth_file, im_rgbd.depth_); - } + }); } vis.UpdateGeometry(); vis.UpdateRender(); diff --git a/examples/cpp/TrimMeshBasedOnPointCloud.cpp b/examples/cpp/TrimMeshBasedOnPointCloud.cpp index a5f82506f0a..ef45c51b801 100644 --- a/examples/cpp/TrimMeshBasedOnPointCloud.cpp +++ b/examples/cpp/TrimMeshBasedOnPointCloud.cpp @@ -7,6 +7,8 @@ #include "open3d/Open3D.h" +#include + void PrintHelp() { using namespace open3d; @@ -63,19 +65,21 @@ int main(int argc, char* argv[]) { geometry::KDTreeFlann kdtree; kdtree.SetGeometry(*pcd); std::vector remove_vertex_mask(mesh->vertices_.size(), false); - utility::ProgressBar progress_bar(mesh->vertices_.size(), - "Prune vetices: "); -#pragma omp parallel for schedule(static) num_threads(utility::EstimateMaxThreads()) - for (int i = 0; i < (int)mesh->vertices_.size(); i++) { - std::vector indices(1); - std::vector dists(1); - int k = kdtree.SearchKNN(mesh->vertices_[i], 1, indices, dists); - if (k == 0 || dists[0] > distance * distance) { - remove_vertex_mask[i] = true; + utility::TBBProgressBar progress_bar(mesh->vertices_.size(), + "Prune vetices: "); + tbb::parallel_for(tbb::blocked_range( + 0, mesh->vertices_.size(), utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range){ + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices(1); + std::vector dists(1); + int k = kdtree.SearchKNN(mesh->vertices_[i], 1, indices, dists); + if (k == 0 || dists[0] > distance * distance) { + remove_vertex_mask[i] = true; + } + ++progress_bar; } -#pragma omp critical - { ++progress_bar; } - } + }); std::vector index_old_to_new(mesh->vertices_.size()); bool has_vert_normal = mesh->HasVertexNormals(); From 0c367840ff96558940773b414ee528cff4af30c9 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Fri, 26 Jan 2024 13:49:06 -0800 Subject: [PATCH 02/22] Swichted usage of std::mutex to tbb::mutex for consistency when used in conjunction with tbb parallel constructs. --- .../impl/continuous_conv/ContinuousConvBackpropFilter.h | 7 +++---- .../ContinuousConvTransposeBackpropFilter.h | 7 +++---- .../ml/impl/sparse_conv/SparseConvBackpropFilter.h | 9 +++++---- .../impl/sparse_conv/SparseConvTransposeBackpropFilter.h | 9 +++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cpp/open3d/ml/impl/continuous_conv/ContinuousConvBackpropFilter.h b/cpp/open3d/ml/impl/continuous_conv/ContinuousConvBackpropFilter.h index b9b37e9410c..6fe3f60255f 100644 --- a/cpp/open3d/ml/impl/continuous_conv/ContinuousConvBackpropFilter.h +++ b/cpp/open3d/ml/impl/continuous_conv/ContinuousConvBackpropFilter.h @@ -7,10 +7,9 @@ #pragma once +#include #include -#include - #include "open3d/ml/impl/continuous_conv/CoordinateTransformation.h" namespace open3d { @@ -61,7 +60,7 @@ void _CConvBackropFilterCPU(TOut* filter_backprop, filter_dims[0]); memset(filter_backprop, 0, sizeof(TOut) * total_filter_size); - std::mutex filter_backprop_mutex; + tbb::mutex filter_backprop_mutex; tbb::parallel_for( tbb::blocked_range(0, num_out, 32), @@ -201,7 +200,7 @@ void _CConvBackropFilterCPU(TOut* filter_backprop, A = (C * B.transpose()).template cast(); { - std::lock_guard lock(filter_backprop_mutex); + tbb::mutex::scoped_lock lock(filter_backprop_mutex); int linear_i = 0; for (int j = 0; j < spatial_filter_size * in_channels; ++j) for (int i = 0; i < out_channels; ++i, ++linear_i) { diff --git a/cpp/open3d/ml/impl/continuous_conv/ContinuousConvTransposeBackpropFilter.h b/cpp/open3d/ml/impl/continuous_conv/ContinuousConvTransposeBackpropFilter.h index 251f59cdf18..56e569a3a36 100644 --- a/cpp/open3d/ml/impl/continuous_conv/ContinuousConvTransposeBackpropFilter.h +++ b/cpp/open3d/ml/impl/continuous_conv/ContinuousConvTransposeBackpropFilter.h @@ -7,10 +7,9 @@ #pragma once +#include #include -#include - #include "open3d/ml/impl/continuous_conv/CoordinateTransformation.h" namespace open3d { @@ -62,7 +61,7 @@ void _CConvTransposeBackpropFilterCPU(TOut* filter_backprop, memset(filter_backprop, 0, sizeof(TOut) * spatial_filter_size * in_channels * out_channels); - std::mutex filter_backprop_mutex; + tbb::mutex filter_backprop_mutex; tbb::parallel_for( tbb::blocked_range(0, num_out, 32), @@ -207,7 +206,7 @@ void _CConvTransposeBackpropFilterCPU(TOut* filter_backprop, A = (C * B.transpose()).template cast(); { - std::lock_guard lock(filter_backprop_mutex); + tbb::mutex::scoped_lock lock(filter_backprop_mutex); int linear_i = 0; for (int j = 0; j < spatial_filter_size * in_channels; ++j) for (int i = 0; i < out_channels; ++i, ++linear_i) { diff --git a/cpp/open3d/ml/impl/sparse_conv/SparseConvBackpropFilter.h b/cpp/open3d/ml/impl/sparse_conv/SparseConvBackpropFilter.h index 67a2712f556..68e31e43e9a 100644 --- a/cpp/open3d/ml/impl/sparse_conv/SparseConvBackpropFilter.h +++ b/cpp/open3d/ml/impl/sparse_conv/SparseConvBackpropFilter.h @@ -6,10 +6,11 @@ // ---------------------------------------------------------------------------- #pragma once -#include #include -#include + +#include +#include namespace open3d { namespace ml { @@ -45,7 +46,7 @@ void _SparseConvBackropFilterCPU(TOut* filter_backprop, num_kernel_elements * in_channels * out_channels; memset(filter_backprop, 0, sizeof(TOut) * total_filter_size); - std::mutex filter_backprop_mutex; + tbb::mutex filter_backprop_mutex; tbb::parallel_for( tbb::blocked_range(0, num_out, 10032), @@ -112,7 +113,7 @@ void _SparseConvBackropFilterCPU(TOut* filter_backprop, A = C * B.transpose(); { - std::lock_guard lock(filter_backprop_mutex); + tbb::mutex::scoped_lock lock(filter_backprop_mutex); int linear_i = 0; for (int j = 0; j < num_kernel_elements * in_channels; ++j) for (int i = 0; i < out_channels; ++i, ++linear_i) { diff --git a/cpp/open3d/ml/impl/sparse_conv/SparseConvTransposeBackpropFilter.h b/cpp/open3d/ml/impl/sparse_conv/SparseConvTransposeBackpropFilter.h index d6559844c0b..6266a178afb 100644 --- a/cpp/open3d/ml/impl/sparse_conv/SparseConvTransposeBackpropFilter.h +++ b/cpp/open3d/ml/impl/sparse_conv/SparseConvTransposeBackpropFilter.h @@ -6,10 +6,11 @@ // ---------------------------------------------------------------------------- #pragma once -#include #include -#include + +#include +#include namespace open3d { namespace ml { @@ -47,7 +48,7 @@ void _SparseConvTransposeBackpropFilterCPU( memset(filter_backprop, 0, sizeof(TOut) * num_kernel_elements * in_channels * out_channels); - std::mutex filter_backprop_mutex; + tbb::mutex filter_backprop_mutex; tbb::parallel_for( tbb::blocked_range(0, num_out, 32), @@ -130,7 +131,7 @@ void _SparseConvTransposeBackpropFilterCPU( A = (C * B.transpose()).template cast(); { - std::lock_guard lock(filter_backprop_mutex); + tbb::mutex::scoped_lock lock(filter_backprop_mutex); int linear_i = 0; for (int j = 0; j < num_kernel_elements * in_channels; ++j) for (int i = 0; i < out_channels; ++i, ++linear_i) { From 1514fc1bc2a77268beee82ca54bc895b0b768d40 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Fri, 26 Jan 2024 16:27:51 -0800 Subject: [PATCH 03/22] Fixed bug in CPU reduction --- cpp/open3d/core/kernel/ReductionCPU.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/open3d/core/kernel/ReductionCPU.cpp b/cpp/open3d/core/kernel/ReductionCPU.cpp index cf4c7f12e85..05fe7c4fab9 100644 --- a/cpp/open3d/core/kernel/ReductionCPU.cpp +++ b/cpp/open3d/core/kernel/ReductionCPU.cpp @@ -194,7 +194,7 @@ class CPUArgReductionEngine { // sub_indexer's workload_idx is indexer_'s ipo_idx. Indexer sub_indexer = indexer_.GetPerOutputIndexer(output_idx); using result_t = std::pair; - result_t val_idx{identity, identity}; + result_t val_idx{-1, identity}; val_idx = tbb::parallel_deterministic_reduce( tbb::blocked_range(0, sub_indexer.NumWorkloads(), utility::DefaultGrainSizeTBB()), val_idx, @@ -213,7 +213,7 @@ class CPUArgReductionEngine { std::get<0>(b), std::get<1>(b)); }); *reinterpret_cast(sub_indexer.GetOutputPtr(0)) = - std::get<1>(val_idx); + std::get<0>(val_idx); } }); } From 31f1e25a6feacb1d23f0a1379c5bc7805530c59b Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Fri, 26 Jan 2024 16:29:08 -0800 Subject: [PATCH 04/22] Shift atomic to outside of RW mutex in PointCloudSegmentation.cpp --- cpp/open3d/geometry/PointCloudSegmentation.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/open3d/geometry/PointCloudSegmentation.cpp b/cpp/open3d/geometry/PointCloudSegmentation.cpp index e4da2a382b4..48ae5b22bd3 100644 --- a/cpp/open3d/geometry/PointCloudSegmentation.cpp +++ b/cpp/open3d/geometry/PointCloudSegmentation.cpp @@ -187,13 +187,12 @@ std::tuple> PointCloud::SegmentPlane( // Use size_t here to avoid large integer which exceed max of int. std::size_t break_iteration = std::numeric_limits::max(); - std::size_t iteration_count = 0; + std::atomic iteration_count = 0; tbb::spin_rw_mutex mtx; tbb::parallel_for(tbb::blocked_range(0, num_iterations, 1), [&](const tbb::blocked_range& range) { for (int i = range.begin(); i < range.end(); ++i) { - // Eschew using a mutex here because doing an extra iteration is ok if (iteration_count > break_iteration) { continue; } @@ -238,8 +237,8 @@ std::tuple> PointCloud::SegmentPlane( } } } - iteration_count++; } + iteration_count++; } }); From d7a82f16724749673901ae0906604300a4ce04b9 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Fri, 26 Jan 2024 16:30:29 -0800 Subject: [PATCH 05/22] Switch from using a mutex to a concurrent vector for parallelization of self intersecting triangles. --- cpp/open3d/geometry/TriangleMesh.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/open3d/geometry/TriangleMesh.cpp b/cpp/open3d/geometry/TriangleMesh.cpp index f77c1965001..154c1bb6957 100644 --- a/cpp/open3d/geometry/TriangleMesh.cpp +++ b/cpp/open3d/geometry/TriangleMesh.cpp @@ -1358,8 +1358,7 @@ bool TriangleMesh::IsVertexManifold() const { std::vector TriangleMesh::GetSelfIntersectingTriangles() const { - std::vector self_intersecting_triangles; - tbb::spin_mutex out_mtx; + tbb::concurrent_vector self_intersecting_triangles; tbb::parallel_for(tbb::blocked_range( 0, triangles_.size(), utility::DefaultGrainSizeTBB()), [&](const tbb::blocked_range& range){ @@ -1396,14 +1395,14 @@ std::vector TriangleMesh::GetSelfIntersectingTriangles() q0.array().max(q1.array().max(q2.array())); if (IntersectionTest::AABBAABB(bb_min1, bb_max1, bb_min2, bb_max2) && IntersectionTest::TriangleTriangle3d(p0, p1, p2, q0, q1, q2)) { - tbb::spin_mutex::scoped_lock lock(out_mtx); self_intersecting_triangles.push_back( Eigen::Vector2i(tidx0, tidx1)); } } } }); - return self_intersecting_triangles; + return {self_intersecting_triangles.begin(), + self_intersecting_triangles.end()}; } bool TriangleMesh::IsSelfIntersecting() const { From 181a431ff3a5bbc300afca2f97ef8f23cff7aab8 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Fri, 26 Jan 2024 16:35:16 -0800 Subject: [PATCH 06/22] Get maximum threads from TBB instead of OpenMP --- cpp/open3d/utility/Parallel.cpp | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/cpp/open3d/utility/Parallel.cpp b/cpp/open3d/utility/Parallel.cpp index 5bb4ef733c3..313f80a1756 100644 --- a/cpp/open3d/utility/Parallel.cpp +++ b/cpp/open3d/utility/Parallel.cpp @@ -7,42 +7,14 @@ #include "open3d/utility/Parallel.h" -#ifdef _OPENMP -#include -#endif +#include -#include -#include - -#include "open3d/utility/CPUInfo.h" -#include "open3d/utility/Logging.h" namespace open3d { namespace utility { -static std::string GetEnvVar(const std::string& name) { - if (const char* value = std::getenv(name.c_str())) { - return std::string(value); - } else { - return ""; - } -} - int EstimateMaxThreads() { -#ifdef _OPENMP - if (!GetEnvVar("OMP_NUM_THREADS").empty() || - !GetEnvVar("OMP_DYNAMIC").empty()) { - // See the full list of OpenMP environment variables at: - // https://www.openmp.org/spec-html/5.0/openmpch6.html - return omp_get_max_threads(); - } else { - // Returns the number of physical cores. - return utility::CPUInfo::GetInstance().NumCores(); - } -#else - (void)&GetEnvVar; // Avoids compiler warning. - return 1; -#endif + return tbb::this_task_arena::max_concurrency(); } std::size_t& DefaultGrainSizeTBB() noexcept { From 2312299c461760c4e3d792548d44bb0d0c96922a Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Fri, 26 Jan 2024 16:39:11 -0800 Subject: [PATCH 07/22] Updates to ProgressBar.h/.cpp Including separating code for writing the progress bar into its own function and a bulk inplace add function operator+=. Also added TBBProgressBar. It does not inherit from ProgressBar as it uses an atomic for counting and has slightly different internals to use that atomicity. --- cpp/open3d/utility/ProgressBar.cpp | 112 ++++++++++++++++++++--------- cpp/open3d/utility/ProgressBar.h | 57 ++++++++++----- cpp/tests/utility/ProgressBar.cpp | 24 +++++-- 3 files changed, 136 insertions(+), 57 deletions(-) diff --git a/cpp/open3d/utility/ProgressBar.cpp b/cpp/open3d/utility/ProgressBar.cpp index 780affbaecb..7a1efae3a9e 100644 --- a/cpp/open3d/utility/ProgressBar.cpp +++ b/cpp/open3d/utility/ProgressBar.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- +#include "open3d/utility/Logging.h" #include "open3d/utility/ProgressBar.h" #include @@ -18,30 +19,36 @@ namespace open3d { namespace utility { -ProgressBar::ProgressBar(size_t expected_count, - const std::string &progress_info, +ProgressBar::ProgressBar(std::size_t expected_count, + std::string progress_info, bool active) { - Reset(expected_count, progress_info, active); + Reset(expected_count, std::move(progress_info), active); } -void ProgressBar::Reset(size_t expected_count, - const std::string &progress_info, +void ProgressBar::Reset(std::size_t expected_count, + std::string progress_info, bool active) { expected_count_ = expected_count; - current_count_ = static_cast(-1); // Guaranteed to wraparound - progress_info_ = progress_info; + progress_info_ = std::move(progress_info); progress_pixel_ = 0; active_ = active; + // Essentially set current count to zero but + // goes through an overridden increment operator + current_count_ = std::numeric_limits::max(); operator++(); } -ProgressBar &ProgressBar::operator++() { +ProgressBar& ProgressBar::operator+=(std::size_t n) { SetCurrentCount(current_count_ + 1); return *this; } -void ProgressBar::SetCurrentCount(size_t n) { +void ProgressBar::SetCurrentCount(std::size_t n) { current_count_ = n; + UpdateBar(); +} + +void ProgressBar::UpdateBar() { if (!active_) { return; } @@ -49,7 +56,7 @@ void ProgressBar::SetCurrentCount(size_t n) { fmt::print("{}[{}] 100%\n", progress_info_, std::string(resolution_, '=')); } else { - size_t new_progress_pixel = + std::size_t new_progress_pixel = int(current_count_ * resolution_ / expected_count_); if (new_progress_pixel > progress_pixel_) { progress_pixel_ = new_progress_pixel; @@ -63,53 +70,92 @@ void ProgressBar::SetCurrentCount(size_t n) { } } -size_t ProgressBar::GetCurrentCount() const { return current_count_; } - -OMPProgressBar::OMPProgressBar(size_t expected_count, - const std::string &progress_info, +OMPProgressBar::OMPProgressBar(std::size_t expected_count, + std::string progress_info, bool active) - : ProgressBar(expected_count, progress_info, active) {} + : ProgressBar(expected_count, std::move(progress_info), active) {} -ProgressBar &OMPProgressBar::operator++() { +ProgressBar& OMPProgressBar::operator+=(std::size_t n) { // Ref: https://stackoverflow.com/a/44555438 #ifdef _OPENMP int number_of_threads = omp_get_num_threads(); int thread_id = omp_get_thread_num(); if (number_of_threads > 1) { if (thread_id == 0) { - SetCurrentCount(current_count_ + number_of_threads); + SetCurrentCount(current_count_ + number_of_threads * n); } } else { - SetCurrentCount(current_count_ + 1); + SetCurrentCount(current_count_ + n); } #else - SetCurrentCount(current_count_ + 1); + SetCurrentCount(current_count_ + n); #endif return *this; } TBBProgressBar::TBBProgressBar(std::size_t expected_count, - const std::string &progress_info, - bool active) - : ProgressBar(expected_count, progress_info, active), - tbb::task_scheduler_observer(), num_threads(0) { - tbb::task_scheduler_observer::observe(true); + std::string progress_info, + bool active) { + Reset(expected_count, std::move(progress_info), active); } -void TBBProgressBar::on_scheduler_entry(bool is_worker) { - if (is_worker) { ++num_threads; } +void TBBProgressBar::Reset(std::size_t expected_count, + std::string progress_info, + bool active) noexcept(false) { + if (expected_count & flag_bit_mask) { + utility::LogError("Expected count out of range [0, 2^31)"); + } + expected_count_ = expected_count; + progress_info_ = std::move(progress_info); + progress_pixel_ = 0; + active_ = active; + current_count_ = 0; + UpdateBar(); } -void TBBProgressBar::on_scheduler_exit(bool is_worker) { - if (is_worker) { --num_threads; } + +TBBProgressBar& TBBProgressBar::operator++() { + ++current_count_; + UpdateBar(); + return *this; } -ProgressBar &TBBProgressBar::operator++() { - // Ref: https://stackoverflow.com/a/44555438 - if (tbb::this_task_arena::current_thread_index() == 0) { - SetCurrentCount(current_count_ + num_threads); - } +TBBProgressBar& TBBProgressBar::operator+=(std::size_t n) { + current_count_ += n; + UpdateBar(); return *this; } +void TBBProgressBar::SetCurrentCount(std::size_t n) { + current_count_ = n; + UpdateBar(); +} + +void TBBProgressBar::UpdateBar() { + if (!active_ || current_count_ & flag_bit_mask) { + return; + } + // Check if the current count equals the expected count + // If so set the flag bit and print 100% + // tmp is created so that compare_exchange doesn't modify expected_count + if (std::size_t tmp = expected_count_; + current_count_.compare_exchange_strong( + tmp, expected_count_ | flag_bit_mask)) { + fmt::print("{}[{}] 100%\n", progress_info_, + std::string(resolution_, '=')); + } else if (tbb::this_task_arena::current_thread_index() != 0) { + std::size_t new_progress_pixel = current_count_ * + resolution_ / expected_count_; + if (new_progress_pixel > progress_pixel_) { + progress_pixel_ = new_progress_pixel; + int percent = int(current_count_ * 100 / expected_count_); + fmt::print("{}[{}>{}] {:d}%\r", progress_info_, + std::string(progress_pixel_, '='), + std::string(resolution_ - 1 - progress_pixel_, ' '), + percent); + fflush(stdout); + } + } +} + } // namespace utility } // namespace open3d diff --git a/cpp/open3d/utility/ProgressBar.h b/cpp/open3d/utility/ProgressBar.h index 813730e1156..cc68a4f2c6b 100644 --- a/cpp/open3d/utility/ProgressBar.h +++ b/cpp/open3d/utility/ProgressBar.h @@ -7,9 +7,10 @@ #pragma once -#include +#include -#include +#include +#include namespace open3d { namespace utility { @@ -17,43 +18,61 @@ namespace utility { class ProgressBar { public: ProgressBar(size_t expected_count, - const std::string &progress_info, + std::string progress_info, bool active = false); void Reset(size_t expected_count, - const std::string &progress_info, + std::string progress_info, bool active); - virtual ProgressBar &operator++(); + inline ProgressBar& operator++() { return *this += 1; }; + virtual ProgressBar& operator+=(std::size_t n); void SetCurrentCount(size_t n); - size_t GetCurrentCount() const; + void UpdateBar(); + inline std::size_t GetCurrentCount() const { return current_count_; } virtual ~ProgressBar() = default; protected: - const size_t resolution_ = 40; - size_t expected_count_; - size_t current_count_; + static constexpr size_t resolution_ = 40; + std::size_t expected_count_; + std::size_t current_count_; std::string progress_info_; - size_t progress_pixel_; + std::size_t progress_pixel_; bool active_; }; class OMPProgressBar : public ProgressBar { public: - OMPProgressBar(size_t expected_count, - const std::string &progress_info, + OMPProgressBar(std::size_t expected_count, + std::string progress_info, bool active = false); - ProgressBar &operator++() override; + ProgressBar& operator+=(std::size_t) override; }; -class TBBProgressBar : public ProgressBar, tbb::task_scheduler_observer { +class TBBProgressBar { public: - std::atomic num_threads; TBBProgressBar(std::size_t expected_count, - const std::string &progress_info, + std::string progress_info, bool active = false); - ProgressBar &operator++() override; + void Reset(std::size_t expected_count, + std::string progress_info, + bool active); + TBBProgressBar& operator++(); + TBBProgressBar& operator+=(std::size_t n); + void SetCurrentCount(std::size_t n); + void UpdateBar(); + inline std::size_t GetCurrentCount() const { + return current_count_ & ~flag_bit_mask; + } + - void on_scheduler_entry(bool is_worker) override; - void on_scheduler_exit(bool is_worker) override; +protected: + static constexpr std::size_t flag_bit_mask = ~(~std::size_t{} >> 1); + static constexpr std::size_t resolution_ = 40; + std::atomic current_count_; + tbb::collaborative_once_flag finalized; + std::size_t expected_count_; + std::string progress_info_; + mutable std::size_t progress_pixel_; + bool active_; }; } // namespace utility diff --git a/cpp/tests/utility/ProgressBar.cpp b/cpp/tests/utility/ProgressBar.cpp index f94f9bd55d8..da08565c1b9 100644 --- a/cpp/tests/utility/ProgressBar.cpp +++ b/cpp/tests/utility/ProgressBar.cpp @@ -34,28 +34,42 @@ TEST(ProgressBar, OMPProgressBar) { utility::OMPProgressBar progress_bar(iterations, "OMPProgressBar test: ", true); -#pragma omp parallel for schedule(static) \ +#pragma omp parallel for schedule(static) default(none) \ + shared(iterations, progress_bar) \ num_threads(utility::EstimateMaxThreads()) for (int i = 0; i < iterations; ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); ++progress_bar; } - EXPECT_TRUE(static_cast(progress_bar.GetCurrentCount()) >= iterations); + EXPECT_GE(static_cast(progress_bar.GetCurrentCount()), iterations); } TEST(ProgressBar, TBBProgressBar) { int iterations = 1000; utility::TBBProgressBar progress_bar( iterations, "TBBProgressBar test: ", true); - tbb::parallel_for(tbb::blocked_range( - 0, iterations, utility::DefaultGrainSizeTBB()), + tbb::parallel_for(tbb::blocked_range(0, iterations, 10), [&](const tbb::blocked_range& range) { for (int i = range.begin(); i < range.end(); ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); ++progress_bar; } }); - EXPECT_TRUE(static_cast(progress_bar.GetCurrentCount()) >= iterations); + EXPECT_EQ(static_cast(progress_bar.GetCurrentCount()), iterations); +} + +TEST(ProgressBar, TBBProgressBarBatched) { + int iterations = 1000; + utility::TBBProgressBar progress_bar( + iterations, "Batched TBBProgressBar test: ", true); + tbb::parallel_for(tbb::blocked_range(0, iterations, 10), + [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + progress_bar += (range.end() - range.begin()); + }); + EXPECT_EQ(static_cast(progress_bar.GetCurrentCount()), iterations); } } // namespace tests From bd258e34a24d0cdccdaa91ce306cd297ece6c712 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Fri, 26 Jan 2024 16:40:01 -0800 Subject: [PATCH 08/22] Updated ClusterDBSCAN in PointCloudCluster.cpp to bulk update the progress bar to limit spinning on the mutex. --- cpp/open3d/geometry/PointCloudCluster.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/cpp/open3d/geometry/PointCloudCluster.cpp b/cpp/open3d/geometry/PointCloudCluster.cpp index 5166b9ac244..acc42a89350 100644 --- a/cpp/open3d/geometry/PointCloudCluster.cpp +++ b/cpp/open3d/geometry/PointCloudCluster.cpp @@ -27,8 +27,8 @@ std::vector PointCloud::ClusterDBSCAN(double eps, // Precompute all neighbors. utility::LogDebug("Precompute neighbors."); - utility::ProgressBar progress_bar(points_.size(), "Precompute neighbors.", - print_progress); + utility::ProgressBar progress_bar(points_.size(), + "Precompute neighbors.", print_progress); std::vector> nbs(points_.size()); tbb::spin_mutex mtx; @@ -39,11 +39,9 @@ std::vector PointCloud::ClusterDBSCAN(double eps, for (std::size_t i = range.begin(); i < range.end(); ++i) { std::vector dists2; kdtree.SearchRadius(points_[i], eps, nbs[i], dists2); - { - tbb::spin_mutex::scoped_lock lock(mtx); - ++progress_bar; - } } + tbb::spin_mutex::scoped_lock lock(mtx); + progress_bar += (range.end() - range.begin()); }); utility::LogDebug("Done Precompute neighbors."); From 27898178ed417d33416112f2136160910855777b Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Fri, 26 Jan 2024 18:50:25 -0800 Subject: [PATCH 09/22] Applied Open3D style --- .../LegacyReconstructionUtil.h | 84 +++-- cpp/open3d/core/ParallelFor.h | 16 +- .../core/hashmap/CPU/CPUHashBackendBuffer.cpp | 17 +- cpp/open3d/core/hashmap/CPU/TBBHashBackend.h | 104 +++--- cpp/open3d/core/kernel/NonZeroCPU.cpp | 34 +- cpp/open3d/core/kernel/ReductionCPU.cpp | 123 ++++--- cpp/open3d/geometry/EstimateNormals.cpp | 101 +++--- cpp/open3d/geometry/ISSKeypoints.cpp | 93 ++--- cpp/open3d/geometry/Image.cpp | 247 ++++++++------ cpp/open3d/geometry/PointCloud.cpp | 199 ++++++----- cpp/open3d/geometry/PointCloudCluster.cpp | 29 +- .../PointCloudPlanarPatchDetection.cpp | 22 +- .../geometry/PointCloudSegmentation.cpp | 107 +++--- cpp/open3d/geometry/TriangleMesh.cpp | 186 +++++----- .../geometry/TriangleMeshDeformation.cpp | 155 +++++---- .../sensor/azure_kinect/AzureKinectSensor.cpp | 32 +- .../sparse_conv/SparseConvBackpropFilter.h | 4 +- .../SparseConvTransposeBackpropFilter.h | 4 +- .../pipelines/color_map/ColorMapUtils.cpp | 265 ++++++++------- .../pipelines/color_map/NonRigidOptimizer.cpp | 174 +++++----- .../pipelines/color_map/RigidOptimizer.cpp | 89 ++--- .../integration/UniformTSDFVolume.cpp | 321 ++++++++++-------- cpp/open3d/pipelines/odometry/Odometry.cpp | 85 ++--- .../registration/FastGlobalRegistration.cpp | 65 ++-- cpp/open3d/pipelines/registration/Feature.cpp | 181 +++++----- .../pipelines/registration/GeneralizedICP.cpp | 19 +- .../pipelines/registration/Registration.cpp | 194 ++++++----- .../pipelines/registration/Registration.h | 8 +- .../t/geometry/kernel/PointCloudCPU.cpp | 72 ++-- cpp/open3d/t/io/sensor/RGBDVideoReader.cpp | 32 +- .../pipelines/kernel/FillInLinearSystemImpl.h | 120 +++---- .../t/pipelines/kernel/RGBDOdometryCPU.cpp | 304 ++++++++--------- .../t/pipelines/kernel/RegistrationCPU.cpp | 208 ++++++------ .../t/pipelines/registration/Feature.cpp | 20 +- cpp/open3d/utility/Eigen.cpp | 60 ++-- cpp/open3d/utility/Logging.h | 8 +- cpp/open3d/utility/Parallel.cpp | 5 +- cpp/open3d/utility/ProgressBar.cpp | 15 +- cpp/open3d/utility/ProgressBar.h | 5 +- cpp/open3d/utility/Random.h | 1 + cpp/tests/utility/ProgressBar.cpp | 43 +-- examples/cpp/RealSenseBagReader.cpp | 27 +- examples/cpp/TrimMeshBasedOnPointCloud.cpp | 4 +- 43 files changed, 2070 insertions(+), 1812 deletions(-) diff --git a/cpp/apps/OfflineReconstruction/LegacyReconstructionUtil.h b/cpp/apps/OfflineReconstruction/LegacyReconstructionUtil.h index 4070e808e39..93a92c88930 100644 --- a/cpp/apps/OfflineReconstruction/LegacyReconstructionUtil.h +++ b/cpp/apps/OfflineReconstruction/LegacyReconstructionUtil.h @@ -549,15 +549,17 @@ class ReconstructionPipeline { const size_t num_pairs = fragment_matching_results.size(); if (config_["multi_threading"].asBool()) { - tbb::parallel_for(tbb::blocked_range(0, num_pairs), - [&](const tbb::blocked_range& range){ - for (std::size_t i = range.begin(); i < range.end(); ++i) { - RegisterFragmentPair(ply_files, - fragment_matching_results[i].s_, - fragment_matching_results[i].t_, - fragment_matching_results[i]); - } - }); + tbb::parallel_for( + tbb::blocked_range(0, num_pairs), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); + ++i) { + RegisterFragmentPair( + ply_files, fragment_matching_results[i].s_, + fragment_matching_results[i].t_, + fragment_matching_results[i]); + } + }); } else { for (size_t i = 0; i < num_pairs; i++) { RegisterFragmentPair(ply_files, fragment_matching_results[i].s_, @@ -640,16 +642,18 @@ class ReconstructionPipeline { } if (config_["multi_threading"].asBool()) { - tbb::parallel_for(tbb::blocked_range(0, - fragment_matching_results.size()), - [&](const tbb::blocked_range& range){ - for (std::size_t i = range.begin(); i < range.end(); ++i) { - const int s = fragment_matching_results[i].s_; - const int t = fragment_matching_results[i].t_; - RefineFragmentPair(ply_files, s, t, - fragment_matching_results[i]); - } - }); + tbb::parallel_for( + tbb::blocked_range( + 0, fragment_matching_results.size()), + [&](const tbb::blocked_range& range) { + for (std::size_t i = range.begin(); i < range.end(); + ++i) { + const int s = fragment_matching_results[i].s_; + const int t = fragment_matching_results[i].t_; + RefineFragmentPair(ply_files, s, t, + fragment_matching_results[i]); + } + }); } else { for (size_t i = 0; i < fragment_matching_results.size(); i++) { const int s = fragment_matching_results[i].s_; @@ -748,23 +752,32 @@ class ReconstructionPipeline { const std::vector& depth_files_, const camera::PinholeCameraIntrinsic& intrinsic_, const PoseGraphT& pose_graph_) - : pipeline(pipeline_), fragment_id(fragment_id_), - color_files(color_files_), depth_files(depth_files_), - intrinsic(intrinsic_), pose_graph(pose_graph_) {} + : pipeline(pipeline_), + fragment_id(fragment_id_), + color_files(color_files_), + depth_files(depth_files_), + intrinsic(intrinsic_), + pose_graph(pose_graph_) {} PointCloudIntegrator(PointCloudIntegrator& o, tbb::split) - : pipeline(o.pipeline), fragment_id(o.fragment_id), - color_files(o.color_files), depth_files(o.depth_files), - intrinsic(o.intrinsic), pose_graph(o.pose_graph) {} + : pipeline(o.pipeline), + fragment_id(o.fragment_id), + color_files(o.color_files), + depth_files(o.depth_files), + intrinsic(o.intrinsic), + pose_graph(o.pose_graph) {} void operator()(const tbb::blocked_range& range) { for (std::size_t i = range.begin(); i < range.end(); ++i) { - const int i_abs = fragment_id * - pipeline.config_["n_frames_per_fragment"].asInt() + i; - utility::LogInfo("Fragment {:03d} / {:03d} :: " + const int i_abs = + fragment_id * pipeline.config_["n_frames_per_fragment"] + .asInt() + + i; + utility::LogInfo( + "Fragment {:03d} / {:03d} :: " "Integrate rgbd frame {:d} ({:d} of {:d}).", - fragment_id, pipeline.n_fragments_ - 1, i_abs, - i + 1, pose_graph.nodes_.size()); + fragment_id, pipeline.n_fragments_ - 1, i_abs, i + 1, + pose_graph.nodes_.size()); const geometry::RGBDImage rgbd = pipeline.ReadRGBDImage( color_files[i_abs], depth_files[i_abs], false); auto pcd = geometry::PointCloud::CreateFromRGBDImage( @@ -773,9 +786,7 @@ class ReconstructionPipeline { } } - void join(PointCloudIntegrator& rhs) { - fragment += rhs.fragment; - } + void join(PointCloudIntegrator& rhs) { fragment += rhs.fragment; } }; void IntegrateFragmentRGBD( @@ -784,16 +795,15 @@ class ReconstructionPipeline { const std::vector& depth_files, const camera::PinholeCameraIntrinsic& intrinsic, const pipelines::registration::PoseGraph& pose_graph) { - const size_t graph_num = pose_graph.nodes_.size(); PointCloudIntegrator reducer(*this, fragment_id, color_files, depth_files, intrinsic, pose_graph); - tbb::parallel_reduce(tbb::blocked_range( - 0, graph_num), reducer); + tbb::parallel_reduce(tbb::blocked_range(0, graph_num), + reducer); const geometry::PointCloud fragment_down = *reducer.fragment.VoxelDownSample( - config_["tsdf_cubic_size"].asDouble() / 512.0); + config_["tsdf_cubic_size"].asDouble() / 512.0); io::WritePointCloud( utility::filesystem::JoinPath( config_["path_dataset"].asString(), diff --git a/cpp/open3d/core/ParallelFor.h b/cpp/open3d/core/ParallelFor.h index ba8104fda89..1d09241fe8a 100644 --- a/cpp/open3d/core/ParallelFor.h +++ b/cpp/open3d/core/ParallelFor.h @@ -81,17 +81,13 @@ void ParallelForCPU_(const Device& device, int64_t n, const func_t& func) { return; } -// #pragma omp parallel for num_threads(utility::EstimateMaxThreads()) -// for (int64_t i = 0; i < n; ++i) { -// func(i); -// } - tbb::parallel_for(tbb::blocked_range(0, n, 32), - [&func](const tbb::blocked_range& range){ - for (int64_t i = range.begin(); i < range.end(); ++i) { - func(i); - } - }); + [&func](const tbb::blocked_range& range) { + for (int64_t i = range.begin(); i < range.end(); + ++i) { + func(i); + } + }); } #endif diff --git a/cpp/open3d/core/hashmap/CPU/CPUHashBackendBuffer.cpp b/cpp/open3d/core/hashmap/CPU/CPUHashBackendBuffer.cpp index 86a23c7e285..9a8a693e95f 100644 --- a/cpp/open3d/core/hashmap/CPU/CPUHashBackendBuffer.cpp +++ b/cpp/open3d/core/hashmap/CPU/CPUHashBackendBuffer.cpp @@ -5,11 +5,11 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- +#include + #include "open3d/core/hashmap/HashBackendBuffer.h" #include "open3d/utility/Parallel.h" -#include - namespace open3d { namespace core { void CPUResetHeap(Tensor& heap) { @@ -17,12 +17,13 @@ void CPUResetHeap(Tensor& heap) { int64_t capacity = heap.GetLength(); tbb::parallel_for(tbb::blocked_range( - 0, capacity, utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - for (int64_t i = range.begin(); i < range.end(); ++i) { - heap_ptr[i] = i; - } - }); + 0, capacity, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (int64_t i = range.begin(); i < range.end(); + ++i) { + heap_ptr[i] = i; + } + }); }; } // namespace core } // namespace open3d diff --git a/cpp/open3d/core/hashmap/CPU/TBBHashBackend.h b/cpp/open3d/core/hashmap/CPU/TBBHashBackend.h index b72ed98bf3d..cb76c786bb4 100644 --- a/cpp/open3d/core/hashmap/CPU/TBBHashBackend.h +++ b/cpp/open3d/core/hashmap/CPU/TBBHashBackend.h @@ -94,17 +94,18 @@ void TBBHashBackend::Find(const void* input_keys, int64_t count) { const Key* input_keys_templated = static_cast(input_keys); - tbb::parallel_for(tbb::blocked_range(0, count, 64), - [=, &impl=impl_](const tbb::blocked_range& range){ - for (int64_t i = range.begin(); i < range.end(); ++i) { - const Key& key = input_keys_templated[i]; - - auto iter = impl->find(key); - bool flag = (iter != impl->end()); - output_masks[i] = flag; - output_buf_indices[i] = flag ? iter->second : 0; - } - }); + tbb::parallel_for( + tbb::blocked_range(0, count, 64), + [=, &impl = impl_](const tbb::blocked_range& range) { + for (int64_t i = range.begin(); i < range.end(); ++i) { + const Key& key = input_keys_templated[i]; + + auto iter = impl->find(key); + bool flag = (iter != impl->end()); + output_masks[i] = flag; + output_buf_indices[i] = flag ? iter->second : 0; + } + }); } template @@ -180,45 +181,50 @@ void TBBHashBackend::Insert( size_t n_values = input_values_soa.size(); - tbb::parallel_for(tbb::blocked_range(0, count, 64), - [&](const tbb::blocked_range& range){ - for (int64_t i = range.begin(); i < range.end(); ++i) { - output_buf_indices[i] = 0; - output_masks[i] = false; - - const Key& key = input_keys_templated[i]; - - // Try to insert a dummy buffer index. - auto res = impl_->insert({key, 0}); - - // Lazy copy key value pair to buffer only if succeeded - if (res.second) { - buf_index_t buf_index = buffer_accessor_->DeviceAllocate(); - void* key_ptr = buffer_accessor_->GetKeyPtr(buf_index); - - // Copy templated key to buffer - *static_cast(key_ptr) = key; - - // Copy/reset non-templated value in buffer - for (size_t j = 0; j < n_values; ++j) { - uint8_t* dst_value = static_cast( - buffer_accessor_->GetValuePtr(buf_index, j)); - - const uint8_t* src_value = - static_cast(input_values_soa[j]) + - this->value_dsizes_[j] * i; - std::memcpy(dst_value, src_value, this->value_dsizes_[j]); + tbb::parallel_for( + tbb::blocked_range(0, count, 64), + [&](const tbb::blocked_range& range) { + for (int64_t i = range.begin(); i < range.end(); ++i) { + output_buf_indices[i] = 0; + output_masks[i] = false; + + const Key& key = input_keys_templated[i]; + + // Try to insert a dummy buffer index. + auto res = impl_->insert({key, 0}); + + // Lazy copy key value pair to buffer only if succeeded + if (res.second) { + buf_index_t buf_index = + buffer_accessor_->DeviceAllocate(); + void* key_ptr = buffer_accessor_->GetKeyPtr(buf_index); + + // Copy templated key to buffer + *static_cast(key_ptr) = key; + + // Copy/reset non-templated value in buffer + for (size_t j = 0; j < n_values; ++j) { + uint8_t* dst_value = static_cast( + buffer_accessor_->GetValuePtr(buf_index, + j)); + + const uint8_t* src_value = + static_cast( + input_values_soa[j]) + + this->value_dsizes_[j] * i; + std::memcpy(dst_value, src_value, + this->value_dsizes_[j]); + } + + // Update from dummy 0 + res.first->second = buf_index; + + // Write to return variables + output_buf_indices[i] = buf_index; + output_masks[i] = true; + } } - - // Update from dummy 0 - res.first->second = buf_index; - - // Write to return variables - output_buf_indices[i] = buf_index; - output_masks[i] = true; - } - } - }); + }); } template diff --git a/cpp/open3d/core/kernel/NonZeroCPU.cpp b/cpp/open3d/core/kernel/NonZeroCPU.cpp index 67618aca5d0..68ffff32a60 100644 --- a/cpp/open3d/core/kernel/NonZeroCPU.cpp +++ b/cpp/open3d/core/kernel/NonZeroCPU.cpp @@ -5,6 +5,8 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- +#include + #include #include "open3d/core/Indexer.h" @@ -12,8 +14,6 @@ #include "open3d/utility/Logging.h" #include "open3d/utility/Parallel.h" -#include - namespace open3d { namespace core { namespace kernel { @@ -48,19 +48,23 @@ Tensor NonZeroCPU(const Tensor& src) { std::vector> non_zero_indices_by_dimensions( num_dims, std::vector(num_non_zeros, 0)); - tbb::parallel_for(tbb::blocked_range( - 0, num_non_zeros, utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - for (int64_t i = range.begin(); i < range.end(); i++) { - int64_t non_zero_index = non_zero_indices[i]; - for (int64_t dim = num_dims - 1; dim >= 0; dim--) { - void* result_ptr = result_iter.GetPtr(dim * num_non_zeros + i); - OPEN3D_ASSERT(result_ptr != nullptr && "Internal error."); - *static_cast(result_ptr) = non_zero_index % shape[dim]; - non_zero_index = non_zero_index / shape[dim]; - } - } - }); + tbb::parallel_for( + tbb::blocked_range(0, num_non_zeros, + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (int64_t i = range.begin(); i < range.end(); i++) { + int64_t non_zero_index = non_zero_indices[i]; + for (int64_t dim = num_dims - 1; dim >= 0; dim--) { + void* result_ptr = + result_iter.GetPtr(dim * num_non_zeros + i); + OPEN3D_ASSERT(result_ptr != nullptr && + "Internal error."); + *static_cast(result_ptr) = + non_zero_index % shape[dim]; + non_zero_index = non_zero_index / shape[dim]; + } + } + }); return result; } diff --git a/cpp/open3d/core/kernel/ReductionCPU.cpp b/cpp/open3d/core/kernel/ReductionCPU.cpp index 05fe7c4fab9..bdde3acfec6 100644 --- a/cpp/open3d/core/kernel/ReductionCPU.cpp +++ b/cpp/open3d/core/kernel/ReductionCPU.cpp @@ -5,6 +5,9 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- +#include +#include + #include #include "open3d/core/Dispatch.h" @@ -14,9 +17,6 @@ #include "open3d/utility/Logging.h" #include "open3d/utility/Parallel.h" -#include -#include - namespace open3d { namespace core { namespace kernel { @@ -113,18 +113,20 @@ class CPUReductionEngine { "single-output reduction ops."); } scalar_t& dst = *reinterpret_cast(indexer.GetOutputPtr(0)); - dst = tbb::parallel_reduce(tbb::blocked_range( - 0, indexer.NumWorkloads(), utility::DefaultGrainSizeTBB()), - identity, [&](const tbb::blocked_range& range, - scalar_t so_far){ - for (int64_t workload_idx = range.begin(); - workload_idx < range.end(); ++workload_idx) { - scalar_t* src = reinterpret_cast( - indexer.GetInputPtr(0, workload_idx)); - so_far = element_kernel(*src, so_far); - } - return so_far; - }, element_kernel); + dst = tbb::parallel_reduce( + tbb::blocked_range(0, indexer.NumWorkloads(), + utility::DefaultGrainSizeTBB()), + identity, + [&](const tbb::blocked_range& range, scalar_t so_far) { + for (int64_t workload_idx = range.begin(); + workload_idx < range.end(); ++workload_idx) { + scalar_t* src = reinterpret_cast( + indexer.GetInputPtr(0, workload_idx)); + so_far = element_kernel(*src, so_far); + } + return so_far; + }, + element_kernel); } template @@ -155,16 +157,18 @@ class CPUReductionEngine { "LaunchReductionKernelTwoPass instead."); } - // TODO: could theoretically do inner reductions in parallel too with TBB - tbb::parallel_for(tbb::blocked_range( - 0, indexer_shape[best_dim], 1), - [&](const tbb::blocked_range& range){ - for (int64_t i = range.begin(); i < range.end(); ++i) { - Indexer sub_indexer(indexer); - sub_indexer.ShrinkDim(best_dim, i, 1); - LaunchReductionKernelSerial(sub_indexer, element_kernel); - } - }); + // TODO: could theoretically do inner reductions in parallel too with + // TBB + tbb::parallel_for( + tbb::blocked_range(0, indexer_shape[best_dim], 1), + [&](const tbb::blocked_range& range) { + for (int64_t i = range.begin(); i < range.end(); ++i) { + Indexer sub_indexer(indexer); + sub_indexer.ShrinkDim(best_dim, i, 1); + LaunchReductionKernelSerial(sub_indexer, + element_kernel); + } + }); } private: @@ -185,37 +189,48 @@ class CPUArgReductionEngine { // sub-iteration. int64_t num_output_elements = indexer_.NumOutputElements(); - tbb::parallel_for(tbb::blocked_range( - 0, num_output_elements, 1), - [&](const tbb::blocked_range& range){ - for (int64_t output_idx = range.begin(); - output_idx < range.end(); ++output_idx) { - // sub_indexer.NumWorkloads() == ipo. - // sub_indexer's workload_idx is indexer_'s ipo_idx. - Indexer sub_indexer = indexer_.GetPerOutputIndexer(output_idx); - using result_t = std::pair; - result_t val_idx{-1, identity}; - val_idx = tbb::parallel_deterministic_reduce( - tbb::blocked_range(0, sub_indexer.NumWorkloads(), - utility::DefaultGrainSizeTBB()), val_idx, - [&](const tbb::blocked_range& range, - result_t so_far){ - for (int64_t workload_idx = range.begin(); - workload_idx < range.end(); ++workload_idx) { - scalar_t& src_val = *reinterpret_cast( - sub_indexer.GetInputPtr(0, workload_idx)); - so_far = reduce_func(workload_idx, src_val, - std::get<0>(so_far), std::get<1>(so_far)); + tbb::parallel_for( + tbb::blocked_range(0, num_output_elements, 1), + [&](const tbb::blocked_range& range) { + for (int64_t output_idx = range.begin(); + output_idx < range.end(); ++output_idx) { + // sub_indexer.NumWorkloads() == ipo. + // sub_indexer's workload_idx is indexer_'s ipo_idx. + Indexer sub_indexer = + indexer_.GetPerOutputIndexer(output_idx); + using result_t = std::pair; + result_t val_idx{-1, identity}; + val_idx = tbb::parallel_deterministic_reduce( + tbb::blocked_range( + 0, sub_indexer.NumWorkloads(), + utility::DefaultGrainSizeTBB()), + val_idx, + [&](const tbb::blocked_range& range, + result_t so_far) { + for (int64_t workload_idx = range.begin(); + workload_idx < range.end(); + ++workload_idx) { + scalar_t& src_val = + *reinterpret_cast( + sub_indexer.GetInputPtr( + 0, + workload_idx)); + so_far = reduce_func( + workload_idx, src_val, + std::get<0>(so_far), + std::get<1>(so_far)); + } + return so_far; + }, + [&reduce_func](result_t a, result_t b) { + return reduce_func( + std::get<0>(a), std::get<1>(a), + std::get<0>(b), std::get<1>(b)); + }); + *reinterpret_cast(sub_indexer.GetOutputPtr( + 0)) = std::get<0>(val_idx); } - return so_far; - }, [&reduce_func](result_t a, result_t b) { - return reduce_func(std::get<0>(a), std::get<1>(a), - std::get<0>(b), std::get<1>(b)); }); - *reinterpret_cast(sub_indexer.GetOutputPtr(0)) = - std::get<0>(val_idx); - } - }); } private: diff --git a/cpp/open3d/geometry/EstimateNormals.cpp b/cpp/open3d/geometry/EstimateNormals.cpp index 3f1ddd6bda2..00b96ca8fb4 100644 --- a/cpp/open3d/geometry/EstimateNormals.cpp +++ b/cpp/open3d/geometry/EstimateNormals.cpp @@ -5,10 +5,11 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- +#include + #include #include #include -#include #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/PointCloud.h" @@ -300,24 +301,26 @@ void PointCloud::EstimateNormals( covariances = covariances_; } - tbb::parallel_for(tbb::blocked_range( - 0, covariances.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - for (std::size_t i = range.begin(); i < range.end(); ++i) { - auto normal = ComputeNormal(covariances[i], fast_normal_computation); - if (normal.norm() == 0.0) { - if (has_normal) { - normal = normals_[i]; - } else { - normal = Eigen::Vector3d(0.0, 0.0, 1.0); + tbb::parallel_for( + tbb::blocked_range(0, covariances.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + auto normal = ComputeNormal(covariances[i], + fast_normal_computation); + if (normal.norm() == 0.0) { + if (has_normal) { + normal = normals_[i]; + } else { + normal = Eigen::Vector3d(0.0, 0.0, 1.0); + } + } + if (has_normal && normal.dot(normals_[i]) < 0.0) { + normal *= -1.0; + } + normals_[i] = normal; } - } - if (has_normal && normal.dot(normals_[i]) < 0.0) { - normal *= -1.0; - } - normals_[i] = normal; - } - }); + }); } void PointCloud::OrientNormalsToAlignWithDirection( @@ -327,18 +330,19 @@ void PointCloud::OrientNormalsToAlignWithDirection( utility::LogError( "No normals in the PointCloud. Call EstimateNormals() first."); } - tbb::parallel_for(tbb::blocked_range( - 0, points_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - for (std::size_t i = range.begin(); i < range.end(); ++i) { - auto &normal = normals_[i]; - if (normal.norm() == 0.0) { - normal = orientation_reference; - } else if (normal.dot(orientation_reference) < 0.0) { - normal *= -1.0; - } - } - }); + tbb::parallel_for( + tbb::blocked_range(0, points_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + auto &normal = normals_[i]; + if (normal.norm() == 0.0) { + normal = orientation_reference; + } else if (normal.dot(orientation_reference) < 0.0) { + normal *= -1.0; + } + } + }); } void PointCloud::OrientNormalsTowardsCameraLocation( @@ -348,25 +352,26 @@ void PointCloud::OrientNormalsTowardsCameraLocation( "No normals in the PointCloud. Call EstimateNormals() first."); } - tbb::parallel_for(tbb::blocked_range( - 0, points_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - for (std::size_t i = range.begin(); i < range.end(); ++i) { - Eigen::Vector3d orientation_reference = - camera_location - points_[i]; - auto &normal = normals_[i]; - if (normal.norm() == 0.0) { - normal = orientation_reference; - if (normal.norm() == 0.0) { - normal = Eigen::Vector3d(0.0, 0.0, 1.0); - } else { - normal.normalize(); + tbb::parallel_for( + tbb::blocked_range(0, points_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + Eigen::Vector3d orientation_reference = + camera_location - points_[i]; + auto &normal = normals_[i]; + if (normal.norm() == 0.0) { + normal = orientation_reference; + if (normal.norm() == 0.0) { + normal = Eigen::Vector3d(0.0, 0.0, 1.0); + } else { + normal.normalize(); + } + } else if (normal.dot(orientation_reference) < 0.0) { + normal *= -1.0; + } } - } else if (normal.dot(orientation_reference) < 0.0) { - normal *= -1.0; - } - } - }); + }); } void PointCloud::OrientNormalsConsistentTangentPlane( diff --git a/cpp/open3d/geometry/ISSKeypoints.cpp b/cpp/open3d/geometry/ISSKeypoints.cpp index db4f633fc1a..b8d16f736a7 100644 --- a/cpp/open3d/geometry/ISSKeypoints.cpp +++ b/cpp/open3d/geometry/ISSKeypoints.cpp @@ -9,6 +9,9 @@ // Copyright (c) 2020 Ignacio Vizzo, Cyrill Stachniss, University of Bonn. // ---------------------------------------------------------------------------- +#include +#include + #include #include #include @@ -18,9 +21,6 @@ #include #include -#include -#include - #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/Keypoint.h" #include "open3d/geometry/PointCloud.h" @@ -86,55 +86,56 @@ std::shared_ptr ComputeISSKeypoints( } std::vector third_eigen_values(points.size()); - tbb::parallel_for(tbb::blocked_range( - 0, points.size(), utility::DefaultGrainSizeTBB()), + tbb::parallel_for( + tbb::blocked_range(0, points.size(), + utility::DefaultGrainSizeTBB()), [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - std::vector indices; - std::vector dist; - int nb_neighbors = - kdtree.SearchRadius(points[i], - salient_radius, indices, dist); - if (nb_neighbors < min_neighbors) { - continue; - } - - Eigen::Matrix3d cov = utility::ComputeCovariance(points, indices); - if (cov.isZero()) { - continue; - } - - Eigen::SelfAdjointEigenSolver solver(cov); - const double& e1c = solver.eigenvalues()[2]; - const double& e2c = solver.eigenvalues()[1]; - const double& e3c = solver.eigenvalues()[0]; - - if ((e2c / e1c) < gamma_21 && e3c / e2c < gamma_32) { - third_eigen_values[i] = e3c; - } - } - }); + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices; + std::vector dist; + int nb_neighbors = kdtree.SearchRadius( + points[i], salient_radius, indices, dist); + if (nb_neighbors < min_neighbors) { + continue; + } + + Eigen::Matrix3d cov = + utility::ComputeCovariance(points, indices); + if (cov.isZero()) { + continue; + } + + Eigen::SelfAdjointEigenSolver solver(cov); + const double& e1c = solver.eigenvalues()[2]; + const double& e2c = solver.eigenvalues()[1]; + const double& e3c = solver.eigenvalues()[0]; + + if ((e2c / e1c) < gamma_21 && e3c / e2c < gamma_32) { + third_eigen_values[i] = e3c; + } + } + }); tbb::concurrent_vector kp_indices; kp_indices.reserve(points.size()); - tbb::parallel_for(tbb::blocked_range( - 0, points.size(), utility::DefaultGrainSizeTBB()), + tbb::parallel_for( + tbb::blocked_range(0, points.size(), + utility::DefaultGrainSizeTBB()), [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - if (third_eigen_values[i] > 0.0) { - std::vector nn_indices; - std::vector dist; - int nb_neighbors = kdtree.SearchRadius( - points[i], non_max_radius, nn_indices, dist); - - if (nb_neighbors >= min_neighbors && - IsLocalMaxima(i, nn_indices, third_eigen_values)) { - kp_indices.emplace_back(i); + for (std::size_t i = range.begin(); i < range.end(); ++i) { + if (third_eigen_values[i] > 0.0) { + std::vector nn_indices; + std::vector dist; + int nb_neighbors = kdtree.SearchRadius( + points[i], non_max_radius, nn_indices, dist); + + if (nb_neighbors >= min_neighbors && + IsLocalMaxima(i, nn_indices, third_eigen_values)) { + kp_indices.emplace_back(i); + } + } } - } - } - }); - + }); utility::LogDebug("[ComputeISSKeypoints] Extracted {} keypoints", kp_indices.size()); diff --git a/cpp/open3d/geometry/Image.cpp b/cpp/open3d/geometry/Image.cpp index f8d6b265fb5..8995d05d11a 100644 --- a/cpp/open3d/geometry/Image.cpp +++ b/cpp/open3d/geometry/Image.cpp @@ -138,21 +138,24 @@ std::shared_ptr Image::Downsample() const { // Integer division always rounds towards zero output->Prepare(width_ / 2, height_ / 2, 1, 4); - tbb::parallel_for(tbb::blocked_range2d( - 0, output->height_, utility::DefaultGrainSizeTBB(), - 0, output->width_, utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range2d& range){ - for (int y = range.rows().begin(); y < range.rows().end(); ++y) { - for (int x = range.cols().begin(); x < range.cols().end(); ++x) { - float *p1 = PointerAt(x * 2, y * 2); - float *p2 = PointerAt(x * 2 + 1, y * 2); - float *p3 = PointerAt(x * 2, y * 2 + 1); - float *p4 = PointerAt(x * 2 + 1, y * 2 + 1); - float *p = output->PointerAt(x, y); - *p = (*p1 + *p2 + *p3 + *p4) / 4.0f; - } - } - }); + tbb::parallel_for( + tbb::blocked_range2d( + 0, output->height_, utility::DefaultGrainSizeTBB(), 0, + output->width_, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range2d &range) { + for (int y = range.rows().begin(); y < range.rows().end(); + ++y) { + for (int x = range.cols().begin(); x < range.cols().end(); + ++x) { + float *p1 = PointerAt(x * 2, y * 2); + float *p2 = PointerAt(x * 2 + 1, y * 2); + float *p3 = PointerAt(x * 2, y * 2 + 1); + float *p4 = PointerAt(x * 2 + 1, y * 2 + 1); + float *p = output->PointerAt(x, y); + *p = (*p1 + *p2 + *p3 + *p4) / 4.0f; + } + } + }); return output; } @@ -170,28 +173,33 @@ std::shared_ptr Image::FilterHorizontal( const int half_kernel_size = (int)(floor((double)kernel.size() / 2.0)); // Define the grain size based on the width of the memory read // Want no more than 1/4 of the read to be the halo cells - const std::size_t multiple = (utility::DefaultGrainSizeTBB() + - 4 * (kernel.size() - 1)) / utility::DefaultGrainSizeTBB(); - const std::size_t grain_size = multiple * utility::DefaultGrainSizeTBB() - + 1 - kernel.size(); - tbb::parallel_for(tbb::blocked_range2d( - 0, height_, grain_size, 0, width_, grain_size), - [&](const tbb::blocked_range2d& range){ - for (int y = range.rows().begin(); y < range.rows().end(); ++y) { - for (int x = range.cols().begin(); x < range.cols().end(); ++x) { - float *po = output->PointerAt(x, y, 0); - double temp = 0; - for (int i = -half_kernel_size; i <= half_kernel_size; ++i) { - int x_shift = x + i; - if (x_shift < 0) x_shift = 0; - if (x_shift > width_ - 1) x_shift = width_ - 1; - float *pi = PointerAt(x_shift, y, 0); - temp += (*pi * (float)kernel[i + half_kernel_size]); + const std::size_t multiple = + (utility::DefaultGrainSizeTBB() + 4 * (kernel.size() - 1)) / + utility::DefaultGrainSizeTBB(); + const std::size_t grain_size = + multiple * utility::DefaultGrainSizeTBB() + 1 - kernel.size(); + tbb::parallel_for( + tbb::blocked_range2d(0, height_, grain_size, 0, width_, + grain_size), + [&](const tbb::blocked_range2d &range) { + for (int y = range.rows().begin(); y < range.rows().end(); + ++y) { + for (int x = range.cols().begin(); x < range.cols().end(); + ++x) { + float *po = output->PointerAt(x, y, 0); + double temp = 0; + for (int i = -half_kernel_size; i <= half_kernel_size; + ++i) { + int x_shift = x + i; + if (x_shift < 0) x_shift = 0; + if (x_shift > width_ - 1) x_shift = width_ - 1; + float *pi = PointerAt(x_shift, y, 0); + temp += (*pi * (float)kernel[i + half_kernel_size]); + } + *po = (float)temp; + } } - *po = (float)temp; - } - } - }); + }); return output; } @@ -257,20 +265,24 @@ std::shared_ptr Image::Transpose() const { int bytes_per_pixel = num_of_channels_ * bytes_per_channel_; tbb::parallel_for(tbb::blocked_range2d( - 0, height_, utility::DefaultGrainSizeTBB(), - 0, width_, utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range2d& range){ - for (int y = range.rows().begin(); y < range.rows().end(); ++y) { - for (int x = range.cols().begin(); x < range.cols().end(); ++x) { - std::copy( - data_.data() + y * in_bytes_per_line + x * bytes_per_pixel, - data_.data() + y * in_bytes_per_line + - (x + 1) * bytes_per_pixel, - output->data_.data() + x * out_bytes_per_line + - y * bytes_per_pixel); - } - } - }); + 0, height_, utility::DefaultGrainSizeTBB(), 0, + width_, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range2d &range) { + for (int y = range.rows().begin(); + y < range.rows().end(); ++y) { + for (int x = range.cols().begin(); + x < range.cols().end(); ++x) { + std::copy( + data_.data() + y * in_bytes_per_line + + x * bytes_per_pixel, + data_.data() + y * in_bytes_per_line + + (x + 1) * bytes_per_pixel, + output->data_.data() + + x * out_bytes_per_line + + y * bytes_per_pixel); + } + } + }); return output; } @@ -280,15 +292,16 @@ std::shared_ptr Image::FlipVertical() const { output->Prepare(width_, height_, num_of_channels_, bytes_per_channel_); int bytes_per_line = BytesPerLine(); - tbb::parallel_for(tbb::blocked_range( - 0, height_, utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - for (int y = range.begin(); y < range.end(); ++y) { - std::copy(data_.data() + y * bytes_per_line, - data_.data() + (y + 1) * bytes_per_line, - output->data_.data() + (height_ - y - 1) * bytes_per_line); - } - }); + tbb::parallel_for( + tbb::blocked_range(0, height_, utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (int y = range.begin(); y < range.end(); ++y) { + std::copy(data_.data() + y * bytes_per_line, + data_.data() + (y + 1) * bytes_per_line, + output->data_.data() + + (height_ - y - 1) * bytes_per_line); + } + }); return output; } @@ -299,22 +312,24 @@ std::shared_ptr Image::FlipHorizontal() const { int bytes_per_line = BytesPerLine(); int bytes_per_pixel = num_of_channels_ * bytes_per_channel_; - tbb::parallel_for(tbb::blocked_range2d( - 0, height_, utility::DefaultGrainSizeTBB(), - 0, width_, utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range2d& range){ - for (int y = range.rows().begin(); y < range.rows().end(); ++y) { - for (int x = range.cols().begin(); x < range.cols().end(); ++x) { - std::copy( - data_.data() + y * bytes_per_line + x * bytes_per_pixel, - data_.data() + y * bytes_per_line + - (x + 1) * bytes_per_pixel, - output->data_.data() + y * bytes_per_line + - (width_ - x - 1) * bytes_per_pixel); - } - } - }); - + tbb::parallel_for( + tbb::blocked_range2d( + 0, height_, utility::DefaultGrainSizeTBB(), 0, width_, + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range2d &range) { + for (int y = range.rows().begin(); y < range.rows().end(); + ++y) { + for (int x = range.cols().begin(); x < range.cols().end(); + ++x) { + std::copy(data_.data() + y * bytes_per_line + + x * bytes_per_pixel, + data_.data() + y * bytes_per_line + + (x + 1) * bytes_per_pixel, + output->data_.data() + y * bytes_per_line + + (width_ - x - 1) * bytes_per_pixel); + } + } + }); return output; } @@ -326,28 +341,37 @@ std::shared_ptr Image::Dilate(int half_kernel_size /* = 1 */) const { } output->Prepare(width_, height_, 1, 1); - tbb::parallel_for(tbb::blocked_range2d( - 0, height_, utility::DefaultGrainSizeTBB() - (2 * half_kernel_size), - 0, width_, utility::DefaultGrainSizeTBB() - (2 * half_kernel_size)), - [&](const tbb::blocked_range2d& range){ - for (int y = range.rows().begin(); y < range.rows().end(); ++y) { - for (int x = range.cols().begin(); x < range.cols().end(); ++x) { - for (int yy = -half_kernel_size; yy <= half_kernel_size; ++yy) { - for (int xx = -half_kernel_size; xx <= half_kernel_size; ++xx) { - unsigned char *pi; - if (TestImageBoundary(x + xx, y + yy)) { - pi = PointerAt(x + xx, y + yy); - if (*pi == 255) { - *output->PointerAt(x, y, 0) = 255; - xx = half_kernel_size; - yy = half_kernel_size; + tbb::parallel_for( + tbb::blocked_range2d( + 0, height_, + utility::DefaultGrainSizeTBB() - (2 * half_kernel_size), 0, + width_, + utility::DefaultGrainSizeTBB() - (2 * half_kernel_size)), + [&](const tbb::blocked_range2d &range) { + for (int y = range.rows().begin(); y < range.rows().end(); + ++y) { + for (int x = range.cols().begin(); x < range.cols().end(); + ++x) { + for (int yy = -half_kernel_size; yy <= half_kernel_size; + ++yy) { + for (int xx = -half_kernel_size; + xx <= half_kernel_size; ++xx) { + unsigned char *pi; + if (TestImageBoundary(x + xx, y + yy)) { + pi = PointerAt(x + xx, + y + yy); + if (*pi == 255) { + *output->PointerAt( + x, y, 0) = 255; + xx = half_kernel_size; + yy = half_kernel_size; + } + } } } } } - } - } - }); + }); return output; } @@ -365,23 +389,28 @@ std::shared_ptr Image::CreateDepthBoundaryMask( auto mask = std::make_shared(); mask->Prepare(width, height, 1, 1); - tbb::parallel_for(tbb::blocked_range2d( - 0, height, utility::DefaultGrainSizeTBB(), - 0, width, utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range2d& range){ - for (int v = range.rows().begin(); v < range.rows().end(); v++) { - for (int u = range.cols().begin(); u < range.cols().end(); u++) { - double dx = *depth_image_gradient_dx->PointerAt(u, v); - double dy = *depth_image_gradient_dy->PointerAt(u, v); - double mag = sqrt(dx * dx + dy * dy); - if (mag > depth_threshold_for_discontinuity_check) { - *mask->PointerAt(u, v) = 255; - } else { - *mask->PointerAt(u, v) = 0; + tbb::parallel_for( + tbb::blocked_range2d( + 0, height, utility::DefaultGrainSizeTBB(), 0, width, + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range2d &range) { + for (int v = range.rows().begin(); v < range.rows().end(); + v++) { + for (int u = range.cols().begin(); u < range.cols().end(); + u++) { + double dx = *depth_image_gradient_dx->PointerAt( + u, v); + double dy = *depth_image_gradient_dy->PointerAt( + u, v); + double mag = sqrt(dx * dx + dy * dy); + if (mag > depth_threshold_for_discontinuity_check) { + *mask->PointerAt(u, v) = 255; + } else { + *mask->PointerAt(u, v) = 0; + } + } } - } - } - }); + }); if (half_dilation_kernel_size_for_discontinuity_map >= 1) { auto mask_dilated = diff --git a/cpp/open3d/geometry/PointCloud.cpp b/cpp/open3d/geometry/PointCloud.cpp index 0f7f4826de0..fb28f3a4e5e 100644 --- a/cpp/open3d/geometry/PointCloud.cpp +++ b/cpp/open3d/geometry/PointCloud.cpp @@ -7,13 +7,13 @@ #include "open3d/geometry/PointCloud.h" +#include +#include + #include #include #include -#include -#include - #include "open3d/geometry/BoundingVolume.h" #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/Qhull.h" @@ -130,22 +130,24 @@ std::vector PointCloud::ComputePointCloudDistance( std::vector distances(points_.size()); KDTreeFlann kdtree; kdtree.SetGeometry(target); - tbb::parallel_for(tbb::blocked_range( - 0, points_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - std::vector indices(1); - std::vector dists(1); - if (kdtree.SearchKNN(points_[i], 1, indices, dists) == 0) { - utility::LogDebug( - "[ComputePointCloudToPointCloudDistance] Found a point " - "without neighbors."); - distances[i] = 0.0; - } else { - distances[i] = std::sqrt(dists[0]); - } - } - }); + tbb::parallel_for( + tbb::blocked_range(0, points_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices(1); + std::vector dists(1); + if (kdtree.SearchKNN(points_[i], 1, indices, dists) == 0) { + utility::LogDebug( + "[ComputePointCloudToPointCloudDistance] Found " + "a point " + "without neighbors."); + distances[i] = 0.0; + } else { + distances[i] = std::sqrt(dists[0]); + } + } + }); return distances; } @@ -582,18 +584,19 @@ PointCloud::RemoveRadiusOutliers(size_t nb_points, utility::TBBProgressBar progress_bar( points_.size(), "Remove radius outliers: ", print_progress); - tbb::parallel_for(tbb::blocked_range( - 0, points_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - std::vector tmp_indices; - std::vector dist; - size_t nb_neighbors = kdtree.SearchRadius(points_[i], - search_radius, tmp_indices, dist); - mask[i] = (nb_neighbors > nb_points); - ++progress_bar; - } - }); + tbb::parallel_for( + tbb::blocked_range(0, points_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector tmp_indices; + std::vector dist; + size_t nb_neighbors = kdtree.SearchRadius( + points_[i], search_radius, tmp_indices, dist); + mask[i] = (nb_neighbors > nb_points); + ++progress_bar; + } + }); std::vector indices; for (size_t i = 0; i < mask.size(); i++) { @@ -627,26 +630,29 @@ PointCloud::RemoveStatisticalOutliers(size_t nb_neighbors, size_t valid_distances = tbb::parallel_reduce( tbb::blocked_range(0, points_.size(), - utility::DefaultGrainSizeTBB()), std::size_t{0}, - [&](const tbb::blocked_range& range, - std::size_t valid_so_far){ - for (std::size_t i = range.begin(); i < range.end(); ++i) { - std::vector tmp_indices; - std::vector dist; - kdtree.SearchKNN(points_[i], int(nb_neighbors), tmp_indices, dist); - double mean = -1.0; - if (dist.size() > 0u) { - ++valid_so_far; - std::for_each(dist.begin(), dist.end(), - [](double &d) { d = std::sqrt(d); }); - mean = std::accumulate(dist.begin(), dist.end(), - 0.0) / dist.size(); - } - avg_distances[i] = mean; - ++progress_bar; - } - return valid_so_far; - }, std::plus{}); + utility::DefaultGrainSizeTBB()), + std::size_t{0}, + [&](const tbb::blocked_range &range, + std::size_t valid_so_far) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector tmp_indices; + std::vector dist; + kdtree.SearchKNN(points_[i], int(nb_neighbors), tmp_indices, + dist); + double mean = -1.0; + if (dist.size() > 0u) { + ++valid_so_far; + std::for_each(dist.begin(), dist.end(), + [](double &d) { d = std::sqrt(d); }); + mean = std::accumulate(dist.begin(), dist.end(), 0.0) / + dist.size(); + } + avg_distances[i] = mean; + ++progress_bar; + } + return valid_so_far; + }, + std::plus{}); if (valid_distances == 0) { return std::make_tuple(std::make_shared(), @@ -663,8 +669,8 @@ PointCloud::RemoveStatisticalOutliers(size_t nb_neighbors, return x > 0 ? (x - cloud_mean) * (y - cloud_mean) : 0; }); // Bessel's correction - double std_dev = std::sqrt(sq_sum / - static_cast(valid_distances - 1)); + double std_dev = + std::sqrt(sq_sum / static_cast(valid_distances - 1)); double distance_threshold = cloud_mean + std_ratio * std_dev; for (size_t i = 0; i < avg_distances.size(); i++) { if (avg_distances[i] > 0 && avg_distances[i] < distance_threshold) { @@ -683,23 +689,28 @@ std::vector PointCloud::EstimatePerPointCovariances( KDTreeFlann kdtree; kdtree.SetGeometry(input); - tbb::parallel_for(tbb::blocked_range(0, points.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - std::vector indices; - std::vector distance2; - if (kdtree.Search(points[i], search_param, indices, distance2) >= 3) { - auto covariance = utility::ComputeCovariance(points, indices); - if (input.HasCovariances() && covariance.isIdentity(1e-4)) { - covariances[i] = input.covariances_[i]; - } else { - covariances[i] = covariance; + tbb::parallel_for( + tbb::blocked_range(0, points.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices; + std::vector distance2; + if (kdtree.Search(points[i], search_param, indices, + distance2) >= 3) { + auto covariance = + utility::ComputeCovariance(points, indices); + if (input.HasCovariances() && + covariance.isIdentity(1e-4)) { + covariances[i] = input.covariances_[i]; + } else { + covariances[i] = covariance; + } + } else { + covariances[i] = Eigen::Matrix3d::Identity(); + } } - } else { - covariances[i] = Eigen::Matrix3d::Identity(); - } - } - }); + }); return covariances; } void PointCloud::EstimateCovariances( @@ -724,14 +735,15 @@ std::vector PointCloud::ComputeMahalanobisDistance() const { Eigen::Matrix3d covariance; std::tie(mean, covariance) = ComputeMeanAndCovariance(); Eigen::Matrix3d cov_inv = covariance.inverse(); - tbb::parallel_for(tbb::blocked_range( - 0, points_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - Eigen::Vector3d p = points_[i] - mean; - mahalanobis[i] = std::sqrt(p.transpose() * cov_inv * p); - } - }); + tbb::parallel_for( + tbb::blocked_range(0, points_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + Eigen::Vector3d p = points_[i] - mean; + mahalanobis[i] = std::sqrt(p.transpose() * cov_inv * p); + } + }); return mahalanobis; } @@ -742,22 +754,23 @@ std::vector PointCloud::ComputeNearestNeighborDistance() const { std::vector nn_dis(points_.size()); KDTreeFlann kdtree(*this); - tbb::parallel_for(tbb::blocked_range( - 0, points_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - std::vector indices(2); - std::vector dists(2); - if (kdtree.SearchKNN(points_[i], 2, indices, dists) <= 1) { - utility::LogDebug( - "[ComputePointCloudNearestNeighborDistance] " - "Found a point without neighbors."); - nn_dis[i] = 0.0; - } else { - nn_dis[i] = std::sqrt(dists[1]); - } - } - }); + tbb::parallel_for( + tbb::blocked_range(0, points_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices(2); + std::vector dists(2); + if (kdtree.SearchKNN(points_[i], 2, indices, dists) <= 1) { + utility::LogDebug( + "[ComputePointCloudNearestNeighborDistance] " + "Found a point without neighbors."); + nn_dis[i] = 0.0; + } else { + nn_dis[i] = std::sqrt(dists[1]); + } + } + }); return nn_dis; } diff --git a/cpp/open3d/geometry/PointCloudCluster.cpp b/cpp/open3d/geometry/PointCloudCluster.cpp index acc42a89350..334e54476b3 100644 --- a/cpp/open3d/geometry/PointCloudCluster.cpp +++ b/cpp/open3d/geometry/PointCloudCluster.cpp @@ -5,12 +5,12 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- +#include +#include + #include #include -#include -#include - #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/PointCloud.h" #include "open3d/utility/Logging.h" @@ -27,22 +27,23 @@ std::vector PointCloud::ClusterDBSCAN(double eps, // Precompute all neighbors. utility::LogDebug("Precompute neighbors."); - utility::ProgressBar progress_bar(points_.size(), - "Precompute neighbors.", print_progress); + utility::ProgressBar progress_bar(points_.size(), "Precompute neighbors.", + print_progress); std::vector> nbs(points_.size()); tbb::spin_mutex mtx; tbb::profiling::set_name(mtx, "ClusterDBSCAN"); - tbb::parallel_for(tbb::blocked_range( - 0, points_.size(), utility::DefaultGrainSizeTBB()), + tbb::parallel_for( + tbb::blocked_range(0, points_.size(), + utility::DefaultGrainSizeTBB()), [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - std::vector dists2; - kdtree.SearchRadius(points_[i], eps, nbs[i], dists2); - } - tbb::spin_mutex::scoped_lock lock(mtx); - progress_bar += (range.end() - range.begin()); - }); + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector dists2; + kdtree.SearchRadius(points_[i], eps, nbs[i], dists2); + } + tbb::spin_mutex::scoped_lock lock(mtx); + progress_bar += (range.end() - range.begin()); + }); utility::LogDebug("Done Precompute neighbors."); diff --git a/cpp/open3d/geometry/PointCloudPlanarPatchDetection.cpp b/cpp/open3d/geometry/PointCloudPlanarPatchDetection.cpp index ff435589937..88c1b602dc2 100644 --- a/cpp/open3d/geometry/PointCloudPlanarPatchDetection.cpp +++ b/cpp/open3d/geometry/PointCloudPlanarPatchDetection.cpp @@ -5,6 +5,8 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- +#include + #include #include #include @@ -14,8 +16,6 @@ #include #include -#include - #include "libqhullcpp/PointCoordinates.h" #include "libqhullcpp/Qhull.h" #include "libqhullcpp/QhullVertex.h" @@ -997,15 +997,17 @@ PointCloud::DetectPlanarPatches( std::vector> neighbors; neighbors.resize(points_.size()); - tbb::parallel_for(tbb::blocked_range( - 0, points_.size(), utility::DefaultGrainSizeTBB()), + tbb::parallel_for( + tbb::blocked_range(0, points_.size(), + utility::DefaultGrainSizeTBB()), [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - std::vector indices; - std::vector distance2; - kdtree.Search(points_[i], search_param, neighbors[i], distance2); - } - }); + for (std::size_t i = range.begin(); i < range.end(); ++i) { + std::vector indices; + std::vector distance2; + kdtree.Search(points_[i], search_param, neighbors[i], + distance2); + } + }); const double normal_similarity_rad = normal_similarity_deg * M_PI / 180.0; const double coplanarity_rad = coplanarity_deg * M_PI / 180.0; diff --git a/cpp/open3d/geometry/PointCloudSegmentation.cpp b/cpp/open3d/geometry/PointCloudSegmentation.cpp index 48ae5b22bd3..74c63236dd5 100644 --- a/cpp/open3d/geometry/PointCloudSegmentation.cpp +++ b/cpp/open3d/geometry/PointCloudSegmentation.cpp @@ -5,6 +5,9 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- +#include +#include + #include #include #include @@ -12,9 +15,6 @@ #include #include -#include -#include - #include "open3d/geometry/PointCloud.h" #include "open3d/geometry/TriangleMesh.h" #include "open3d/utility/Logging.h" @@ -63,9 +63,9 @@ class RANSACResult { RANSACResult() : fitness_(0), inlier_rmse_(0) {} ~RANSACResult() {} - bool IsBetterRANSACThan(const RANSACResult& other) { - return fitness_ > other.fitness_ || - (fitness_ == other.fitness_ && inlier_rmse_ < other.inlier_rmse_); + bool IsBetterRANSACThan(const RANSACResult &other) { + return fitness_ > other.fitness_ || (fitness_ == other.fitness_ && + inlier_rmse_ < other.inlier_rmse_); } public: @@ -190,57 +190,62 @@ std::tuple> PointCloud::SegmentPlane( std::atomic iteration_count = 0; tbb::spin_rw_mutex mtx; - tbb::parallel_for(tbb::blocked_range(0, num_iterations, 1), - [&](const tbb::blocked_range& range) { - for (int i = range.begin(); i < range.end(); ++i) { - if (iteration_count > break_iteration) { - continue; - } + tbb::parallel_for( + tbb::blocked_range(0, num_iterations, 1), + [&](const tbb::blocked_range &range) { + for (int i = range.begin(); i < range.end(); ++i) { + if (iteration_count > break_iteration) { + continue; + } - const std::vector sampled_indices = sampler(ransac_n); - std::vector inliers = sampled_indices; - - // Fit model to num_model_parameters randomly selected points - // among the inliers. - Eigen::Vector4d plane_model; - if (ransac_n == 3) { - plane_model = TriangleMesh::ComputeTrianglePlane( - points_[inliers[0]], points_[inliers[1]], - points_[inliers[2]]); - } else { - plane_model = GetPlaneFromPoints(points_, inliers); - } + const std::vector sampled_indices = + sampler(ransac_n); + std::vector inliers = sampled_indices; + + // Fit model to num_model_parameters randomly selected + // points among the inliers. + Eigen::Vector4d plane_model; + if (ransac_n == 3) { + plane_model = TriangleMesh::ComputeTrianglePlane( + points_[inliers[0]], points_[inliers[1]], + points_[inliers[2]]); + } else { + plane_model = GetPlaneFromPoints(points_, inliers); + } - if (plane_model.isZero(0)) { - continue; - } + if (plane_model.isZero(0)) { + continue; + } - inliers.clear(); - auto this_result = EvaluateRANSACBasedOnDistance( - points_, plane_model, inliers, distance_threshold); - { - tbb::spin_rw_mutex::scoped_lock lock(mtx); - if (this_result.IsBetterRANSACThan(result)) { - lock.upgrade_to_writer(); - // Have to recheck after upgrading lock because - // upgrading requires releasing the lock - if (this_result.IsBetterRANSACThan(result)) { - result = this_result; - best_plane_model = plane_model; - if (result.fitness_ < 1.0) { - break_iteration = std::min(log(1 - probability) / - log(1 - pow(result.fitness_, ransac_n)), - (double)num_iterations); - } else { - // Set break_iteration to 0 so we break the loop. - break_iteration = 0; + inliers.clear(); + auto this_result = EvaluateRANSACBasedOnDistance( + points_, plane_model, inliers, distance_threshold); + { + tbb::spin_rw_mutex::scoped_lock lock(mtx); + if (this_result.IsBetterRANSACThan(result)) { + lock.upgrade_to_writer(); + // Have to recheck after upgrading lock because + // upgrading requires releasing the lock + if (this_result.IsBetterRANSACThan(result)) { + result = this_result; + best_plane_model = plane_model; + if (result.fitness_ < 1.0) { + break_iteration = std::min( + log(1 - probability) / + log(1 - pow(result.fitness_, + ransac_n)), + (double)num_iterations); + } else { + // Set break_iteration to 0 so we break the + // loop. + break_iteration = 0; + } + } } } + iteration_count++; } - } - iteration_count++; - } - }); + }); // Find the final inliers using best_plane_model. std::vector final_inliers; diff --git a/cpp/open3d/geometry/TriangleMesh.cpp b/cpp/open3d/geometry/TriangleMesh.cpp index 154c1bb6957..96b87028695 100644 --- a/cpp/open3d/geometry/TriangleMesh.cpp +++ b/cpp/open3d/geometry/TriangleMesh.cpp @@ -7,11 +7,14 @@ #include "open3d/geometry/TriangleMesh.h" +#include +#include +#include + #include #include #include #include -#include #include "open3d/geometry/BoundingVolume.h" #include "open3d/geometry/IntersectionTest.h" @@ -929,14 +932,16 @@ TriangleMesh &TriangleMesh::MergeCloseVertices(double eps) { utility::LogDebug("Precompute Neighbours"); std::vector> nbs(vertices_.size()); utility::LogDebug("Done Precompute Neighbours"); - tbb::parallel_for(tbb::blocked_range( - 0, vertices_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - std::vector dists2; - for (std::size_t idx = range.begin(); idx < range.end(); ++idx) { - kdtree.SearchRadius(vertices_[idx], eps, nbs[idx], dists2); - } - }); + tbb::parallel_for( + tbb::blocked_range(0, vertices_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + std::vector dists2; + for (std::size_t idx = range.begin(); idx < range.end(); + ++idx) { + kdtree.SearchRadius(vertices_[idx], eps, nbs[idx], dists2); + } + }); bool has_vertex_normals = HasVertexNormals(); bool has_vertex_colors = HasVertexColors(); @@ -1212,22 +1217,26 @@ double TriangleMesh::GetVolume() const { "computed."); } - return std::abs(tbb::parallel_reduce(tbb::blocked_range( - 0, triangles_.size(), utility::DefaultGrainSizeTBB()), 0.0, - [&](const tbb::blocked_range& range, double volume){ - for (std::size_t tidx = range.begin(); tidx < range.end(); ++tidx) { - // Computes the signed volume of the tetrahedron defined by - // the three triangle vertices and the origin. The sign is determined by - // checking if the origin is at the same side as the normal with respect to - // the triangle. - const Eigen::Vector3i &triangle = triangles_[tidx]; - const Eigen::Vector3d &vertex0 = vertices_[triangle(0)]; - const Eigen::Vector3d &vertex1 = vertices_[triangle(1)]; - const Eigen::Vector3d &vertex2 = vertices_[triangle(2)]; - volume += vertex0.dot(vertex1.cross(vertex2)) / 6.0; - } - return volume; - }, std::plus{})); + return std::abs(tbb::parallel_reduce( + tbb::blocked_range(0, triangles_.size(), + utility::DefaultGrainSizeTBB()), + 0.0, + [&](const tbb::blocked_range &range, double volume) { + for (std::size_t tidx = range.begin(); tidx < range.end(); + ++tidx) { + // Computes the signed volume of the tetrahedron defined by + // the three triangle vertices and the origin. The sign is + // determined by checking if the origin is at the same side + // as the normal with respect to the triangle. + const Eigen::Vector3i &triangle = triangles_[tidx]; + const Eigen::Vector3d &vertex0 = vertices_[triangle(0)]; + const Eigen::Vector3d &vertex1 = vertices_[triangle(1)]; + const Eigen::Vector3d &vertex2 = vertices_[triangle(2)]; + volume += vertex0.dot(vertex1.cross(vertex2)) / 6.0; + } + return volume; + }, + std::plus{})); } Eigen::Vector4d TriangleMesh::ComputeTrianglePlane(const Eigen::Vector3d &p0, @@ -1359,48 +1368,53 @@ bool TriangleMesh::IsVertexManifold() const { std::vector TriangleMesh::GetSelfIntersectingTriangles() const { tbb::concurrent_vector self_intersecting_triangles; - tbb::parallel_for(tbb::blocked_range( - 0, triangles_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - for (std::size_t tidx0 = range.begin(); tidx0 < range.end(); ++tidx0) { - const Eigen::Vector3i &tria_p = triangles_[tidx0]; - const Eigen::Vector3d &p0 = vertices_[tria_p(0)]; - const Eigen::Vector3d &p1 = vertices_[tria_p(1)]; - const Eigen::Vector3d &p2 = vertices_[tria_p(2)]; - - const Eigen::Vector3d bb_min1 = - p0.array().min(p1.array().min(p2.array())); - const Eigen::Vector3d bb_max1 = - p0.array().max(p1.array().max(p2.array())); - - for (size_t tidx1 = tidx0 + 1; tidx1 < triangles_.size(); ++tidx1) { - const Eigen::Vector3i &tria_q = triangles_[tidx1]; - // check if neighbour triangle - if (tria_p(0) == tria_q(0) || tria_p(0) == tria_q(1) || - tria_p(0) == tria_q(2) || tria_p(1) == tria_q(0) || - tria_p(1) == tria_q(1) || tria_p(1) == tria_q(2) || - tria_p(2) == tria_q(0) || tria_p(2) == tria_q(1) || - tria_p(2) == tria_q(2)) { - continue; - } - - // check for intersection - const Eigen::Vector3d &q0 = vertices_[tria_q(0)]; - const Eigen::Vector3d &q1 = vertices_[tria_q(1)]; - const Eigen::Vector3d &q2 = vertices_[tria_q(2)]; - - const Eigen::Vector3d bb_min2 = - q0.array().min(q1.array().min(q2.array())); - const Eigen::Vector3d bb_max2 = - q0.array().max(q1.array().max(q2.array())); - if (IntersectionTest::AABBAABB(bb_min1, bb_max1, bb_min2, bb_max2) && - IntersectionTest::TriangleTriangle3d(p0, p1, p2, q0, q1, q2)) { - self_intersecting_triangles.push_back( - Eigen::Vector2i(tidx0, tidx1)); + tbb::parallel_for( + tbb::blocked_range(0, triangles_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t tidx0 = range.begin(); tidx0 < range.end(); + ++tidx0) { + const Eigen::Vector3i &tria_p = triangles_[tidx0]; + const Eigen::Vector3d &p0 = vertices_[tria_p(0)]; + const Eigen::Vector3d &p1 = vertices_[tria_p(1)]; + const Eigen::Vector3d &p2 = vertices_[tria_p(2)]; + + const Eigen::Vector3d bb_min1 = + p0.array().min(p1.array().min(p2.array())); + const Eigen::Vector3d bb_max1 = + p0.array().max(p1.array().max(p2.array())); + + for (size_t tidx1 = tidx0 + 1; tidx1 < triangles_.size(); + ++tidx1) { + const Eigen::Vector3i &tria_q = triangles_[tidx1]; + // check if neighbour triangle + if (tria_p(0) == tria_q(0) || tria_p(0) == tria_q(1) || + tria_p(0) == tria_q(2) || tria_p(1) == tria_q(0) || + tria_p(1) == tria_q(1) || tria_p(1) == tria_q(2) || + tria_p(2) == tria_q(0) || tria_p(2) == tria_q(1) || + tria_p(2) == tria_q(2)) { + continue; + } + + // check for intersection + const Eigen::Vector3d &q0 = vertices_[tria_q(0)]; + const Eigen::Vector3d &q1 = vertices_[tria_q(1)]; + const Eigen::Vector3d &q2 = vertices_[tria_q(2)]; + + const Eigen::Vector3d bb_min2 = + q0.array().min(q1.array().min(q2.array())); + const Eigen::Vector3d bb_max2 = + q0.array().max(q1.array().max(q2.array())); + if (IntersectionTest::AABBAABB(bb_min1, bb_max1, + bb_min2, bb_max2) && + IntersectionTest::TriangleTriangle3d(p0, p1, p2, q0, + q1, q2)) { + self_intersecting_triangles.push_back( + Eigen::Vector2i(tidx0, tidx1)); + } + } } - } - } - }); + }); return {self_intersecting_triangles.begin(), self_intersecting_triangles.end()}; } @@ -1445,25 +1459,27 @@ TriangleMesh::ClusterConnectedTriangles() const { utility::LogDebug("[ClusterConnectedTriangles] Compute triangle adjacency"); auto edges_to_triangles = GetEdgeToTrianglesMap(); std::vector> adjacency_list(triangles_.size()); - tbb::parallel_for(tbb::blocked_range( - 0, triangles_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - for (std::size_t tidx = range.begin(); tidx < range.end(); ++tidx) { - const auto &triangle = triangles_[tidx]; - for (auto tnb : - edges_to_triangles[GetOrderedEdge(triangle(0), triangle(1))]) { - adjacency_list[tidx].insert(tnb); - } - for (auto tnb : - edges_to_triangles[GetOrderedEdge(triangle(0), triangle(2))]) { - adjacency_list[tidx].insert(tnb); - } - for (auto tnb : - edges_to_triangles[GetOrderedEdge(triangle(1), triangle(2))]) { - adjacency_list[tidx].insert(tnb); - } - } - }); + tbb::parallel_for( + tbb::blocked_range(0, triangles_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t tidx = range.begin(); tidx < range.end(); + ++tidx) { + const auto &triangle = triangles_[tidx]; + for (auto tnb : edges_to_triangles[GetOrderedEdge( + triangle(0), triangle(1))]) { + adjacency_list[tidx].insert(tnb); + } + for (auto tnb : edges_to_triangles[GetOrderedEdge( + triangle(0), triangle(2))]) { + adjacency_list[tidx].insert(tnb); + } + for (auto tnb : edges_to_triangles[GetOrderedEdge( + triangle(1), triangle(2))]) { + adjacency_list[tidx].insert(tnb); + } + } + }); utility::LogDebug( "[ClusterConnectedTriangles] Done computing triangle adjacency"); diff --git a/cpp/open3d/geometry/TriangleMeshDeformation.cpp b/cpp/open3d/geometry/TriangleMeshDeformation.cpp index 4541c0fd7c4..460fc2614a3 100644 --- a/cpp/open3d/geometry/TriangleMeshDeformation.cpp +++ b/cpp/open3d/geometry/TriangleMeshDeformation.cpp @@ -5,12 +5,12 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- +#include + #include #include #include -#include - #include "open3d/geometry/TriangleMesh.h" #include "open3d/utility/Logging.h" #include "open3d/utility/Parallel.h" @@ -19,8 +19,8 @@ namespace open3d { namespace geometry { std::shared_ptr TriangleMesh::DeformAsRigidAsPossible( - const std::vector &constraint_vertex_indices, - const std::vector &constraint_vertex_positions, + const std::vector& constraint_vertex_indices, + const std::vector& constraint_vertex_positions, size_t max_iter, DeformAsRigidAsPossibleEnergy energy_model, double smoothed_alpha) const { @@ -95,77 +95,86 @@ std::shared_ptr TriangleMesh::DeformAsRigidAsPossible( std::swap(Rs, Rs_old); } - tbb::parallel_for(tbb::blocked_range( - 0, vertices_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - for (int i = range.begin(); i < range.end(); ++i) { - // Update rotations - Eigen::Matrix3d S = Eigen::Matrix3d::Zero(); - Eigen::Matrix3d R = Eigen::Matrix3d::Zero(); - int n_nbs = 0; - for (int j : prime->adjacency_list_[i]) { - Eigen::Vector3d e0 = vertices_[i] - vertices_[j]; - Eigen::Vector3d e1 = prime->vertices_[i] - prime->vertices_[j]; - double w = edge_weights[GetOrderedEdge(i, j)]; - S += w * (e0 * e1.transpose()); - if (energy_model == DeformAsRigidAsPossibleEnergy::Smoothed) { - R += Rs_old[j]; + tbb::parallel_for( + tbb::blocked_range(0, vertices_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { + // Update rotations + Eigen::Matrix3d S = Eigen::Matrix3d::Zero(); + Eigen::Matrix3d R = Eigen::Matrix3d::Zero(); + int n_nbs = 0; + for (int j : prime->adjacency_list_[i]) { + Eigen::Vector3d e0 = vertices_[i] - vertices_[j]; + Eigen::Vector3d e1 = + prime->vertices_[i] - prime->vertices_[j]; + double w = edge_weights[GetOrderedEdge(i, j)]; + S += w * (e0 * e1.transpose()); + if (energy_model == + DeformAsRigidAsPossibleEnergy::Smoothed) { + R += Rs_old[j]; + } + n_nbs++; + } + if (energy_model == + DeformAsRigidAsPossibleEnergy::Smoothed && + iter > 0 && n_nbs > 0) { + S = 2 * S + + (4 * smoothed_alpha * surface_area / n_nbs) * + R.transpose(); + } + Eigen::JacobiSVD svd( + S, Eigen::ComputeFullU | Eigen::ComputeFullV); + Eigen::Matrix3d U = svd.matrixU(); + Eigen::Matrix3d V = svd.matrixV(); + Eigen::Vector3d D(1, 1, + (V * U.transpose()).determinant()); + // ensure rotation: + // http://graphics.stanford.edu/~smr/ICP/comparison/eggert_comparison_mva97.pdf + Rs[i] = V * D.asDiagonal() * U.transpose(); + if (Rs[i].determinant() <= 0) { + utility::LogError( + "something went wrong with updating R"); + } } - n_nbs++; - } - if (energy_model == DeformAsRigidAsPossibleEnergy::Smoothed && - iter > 0 && n_nbs > 0) { - S = 2 * S + (4 * smoothed_alpha * surface_area / n_nbs) - * R.transpose(); - } - Eigen::JacobiSVD svd( - S, Eigen::ComputeFullU | Eigen::ComputeFullV); - Eigen::Matrix3d U = svd.matrixU(); - Eigen::Matrix3d V = svd.matrixV(); - Eigen::Vector3d D(1, 1, (V * U.transpose()).determinant()); - // ensure rotation: - // http://graphics.stanford.edu/~smr/ICP/comparison/eggert_comparison_mva97.pdf - Rs[i] = V * D.asDiagonal() * U.transpose(); - if (Rs[i].determinant() <= 0) { - utility::LogError("something went wrong with updating R"); - } - } - }); - - tbb::parallel_for(tbb::blocked_range( - 0, vertices_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range){ - for (int i = range.begin(); i < range.end(); ++i) { - // Update Positions - Eigen::Vector3d bi(0, 0, 0); - if (constraints.count(i) > 0) { - bi = constraints[i]; - } else { - for (int j : prime->adjacency_list_[i]) { - double w = edge_weights[GetOrderedEdge(i, j)]; - bi += w / 2 * - ((Rs[i] + Rs[j]) * (vertices_[i] - vertices_[j])); + }); + + tbb::parallel_for( + tbb::blocked_range(0, vertices_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { + // Update Positions + Eigen::Vector3d bi(0, 0, 0); + if (constraints.count(i) > 0) { + bi = constraints[i]; + } else { + for (int j : prime->adjacency_list_[i]) { + double w = edge_weights[GetOrderedEdge(i, j)]; + bi += w / 2 * + ((Rs[i] + Rs[j]) * + (vertices_[i] - vertices_[j])); + } + } + b[0](i) = bi(0); + b[1](i) = bi(1); + b[2](i) = bi(2); } - } - b[0](i) = bi(0); - b[1](i) = bi(1); - b[2](i) = bi(2); - } - }); - - tbb::parallel_for(tbb::blocked_range(0, 3, 1), - [&](const tbb::blocked_range& range){ - for (int comp = range.begin(); comp < range.end(); ++comp) { - Eigen::VectorXd p_prime = solver.solve(b[comp]); - if (solver.info() != Eigen::Success) { - utility::LogError("Cholesky solve failed"); - } - for (int i = 0; i < int(vertices_.size()); ++i) { - prime->vertices_[i](comp) = p_prime(i); - } - } - }); - + }); + + tbb::parallel_for( + tbb::blocked_range(0, 3, 1), + [&](const tbb::blocked_range& range) { + for (int comp = range.begin(); comp < range.end(); ++comp) { + Eigen::VectorXd p_prime = solver.solve(b[comp]); + if (solver.info() != Eigen::Success) { + utility::LogError("Cholesky solve failed"); + } + for (int i = 0; i < int(vertices_.size()); ++i) { + prime->vertices_[i](comp) = p_prime(i); + } + } + }); // Compute energy and log double energy = 0; diff --git a/cpp/open3d/io/sensor/azure_kinect/AzureKinectSensor.cpp b/cpp/open3d/io/sensor/azure_kinect/AzureKinectSensor.cpp index e137fa57933..535f0295b84 100644 --- a/cpp/open3d/io/sensor/azure_kinect/AzureKinectSensor.cpp +++ b/cpp/open3d/io/sensor/azure_kinect/AzureKinectSensor.cpp @@ -9,10 +9,9 @@ #include #include -#include - -#include #include +#include +#include #include @@ -164,18 +163,21 @@ void ConvertBGRAToRGB(geometry::Image &bgra, geometry::Image &rgb) { } tbb::parallel_for(tbb::blocked_range2d( - 0, bgra.height_, utility::DefaultGrainSizeTBB2D(), - 0, bgra.width_, utility::DefaultGrainSizeTBB2D()), - [&](const tbb::blocked_range2d& range){ - for (int v = range.rows().begin(); v < range.rows().end(); ++v) { - for (int u = range.cols().begin(); u < range.cols().end(); ++u) { - for (int c = 0; c < 3; ++c) { - *rgb.PointerAt(u, v, c) = - *bgra.PointerAt(u, v, 2 - c); - } - } - } - }); + 0, bgra.height_, utility::DefaultGrainSizeTBB2D(), + 0, bgra.width_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range2d &range) { + for (int v = range.rows().begin(); + v < range.rows().end(); ++v) { + for (int u = range.cols().begin(); + u < range.cols().end(); ++u) { + for (int c = 0; c < 3; ++c) { + *rgb.PointerAt(u, v, c) = + *bgra.PointerAt(u, v, + 2 - c); + } + } + } + }); } bool AzureKinectSensor::PrintFirmware(k4a_device_t device) { diff --git a/cpp/open3d/ml/impl/sparse_conv/SparseConvBackpropFilter.h b/cpp/open3d/ml/impl/sparse_conv/SparseConvBackpropFilter.h index 68e31e43e9a..a8a0617ded3 100644 --- a/cpp/open3d/ml/impl/sparse_conv/SparseConvBackpropFilter.h +++ b/cpp/open3d/ml/impl/sparse_conv/SparseConvBackpropFilter.h @@ -7,11 +7,11 @@ #pragma once -#include - #include #include +#include + namespace open3d { namespace ml { namespace impl { diff --git a/cpp/open3d/ml/impl/sparse_conv/SparseConvTransposeBackpropFilter.h b/cpp/open3d/ml/impl/sparse_conv/SparseConvTransposeBackpropFilter.h index 6266a178afb..db54d4be9c8 100644 --- a/cpp/open3d/ml/impl/sparse_conv/SparseConvTransposeBackpropFilter.h +++ b/cpp/open3d/ml/impl/sparse_conv/SparseConvTransposeBackpropFilter.h @@ -7,11 +7,11 @@ #pragma once -#include - #include #include +#include + namespace open3d { namespace ml { namespace impl { diff --git a/cpp/open3d/pipelines/color_map/ColorMapUtils.cpp b/cpp/open3d/pipelines/color_map/ColorMapUtils.cpp index 10539c4a12b..91d00a6b83d 100644 --- a/cpp/open3d/pipelines/color_map/ColorMapUtils.cpp +++ b/cpp/open3d/pipelines/color_map/ColorMapUtils.cpp @@ -7,9 +7,9 @@ #include "open3d/pipelines/color_map/ColorMapUtils.h" +#include #include #include -#include #include "open3d/camera/PinholeCameraTrajectory.h" #include "open3d/geometry/Image.h" @@ -133,49 +133,55 @@ CreateVertexAndImageVisibility( std::vector> visibility_image_to_vertex; visibility_image_to_vertex.resize(n_camera); - tbb::parallel_for(tbb::blocked_range(0, n_camera, 1), + tbb::parallel_for( + tbb::blocked_range(0, n_camera, 1), [&](const tbb::blocked_range& range) { - for (int camera_id = range.begin(); - camera_id < range.end(); ++camera_id) { - for (int vertex_id = 0; vertex_id < n_vertex; vertex_id++) { - Eigen::Vector3d X = mesh.vertices_[vertex_id]; - float u, v, d; - std::tie(u, v, d) = Project3DPointAndGetUVDepth( - X, camera_trajectory.parameters_[camera_id]); - int u_d = int(round(u)); - int v_d = int(round(v)); - // Skip if vertex in image boundary. - if (d < 0.0 || - !images_depth[camera_id].TestImageBoundary(u_d, v_d)) { - continue; - } - // Skip if vertex's depth is too large (e.g. background). - float d_sensor = - *images_depth[camera_id].PointerAt(u_d, v_d); - if (d_sensor > maximum_allowable_depth) { - continue; - } - // Check depth boundary mask. If a vertex is located at the boundary - // of an object, its color will be highly diverse from different - // viewing angles. - if (*images_mask[camera_id].PointerAt(u_d, v_d) == 255) { - continue; - } - // Check depth errors. - if (std::fabs(d - d_sensor) >= - depth_threshold_for_visibility_check) { - continue; + for (int camera_id = range.begin(); camera_id < range.end(); + ++camera_id) { + for (int vertex_id = 0; vertex_id < n_vertex; vertex_id++) { + Eigen::Vector3d X = mesh.vertices_[vertex_id]; + float u, v, d; + std::tie(u, v, d) = Project3DPointAndGetUVDepth( + X, camera_trajectory.parameters_[camera_id]); + int u_d = int(round(u)); + int v_d = int(round(v)); + // Skip if vertex in image boundary. + if (d < 0.0 || + !images_depth[camera_id].TestImageBoundary(u_d, + v_d)) { + continue; + } + // Skip if vertex's depth is too large (e.g. + // background). + float d_sensor = + *images_depth[camera_id].PointerAt(u_d, + v_d); + if (d_sensor > maximum_allowable_depth) { + continue; + } + // Check depth boundary mask. If a vertex is located at + // the boundary of an object, its color will be highly + // diverse from different viewing angles. + if (*images_mask[camera_id].PointerAt( + u_d, v_d) == 255) { + continue; + } + // Check depth errors. + if (std::fabs(d - d_sensor) >= + depth_threshold_for_visibility_check) { + continue; + } + visibility_image_to_vertex[camera_id].push_back( + vertex_id); + } } - visibility_image_to_vertex[camera_id].push_back(vertex_id); - } - } - }); + }); // visibility_vertex_to_image[v]: cameras that can see vertex v. std::vector> visibility_vertex_to_image; visibility_vertex_to_image.resize(n_vertex); for (int i = 0; i < n_camera; ++i) { - for (int vidx: visibility_image_to_vertex[i]) { + for (int vidx : visibility_image_to_vertex[i]) { visibility_vertex_to_image[vidx].emplace_back(i); } } @@ -203,39 +209,42 @@ void SetProxyIntensityForVertex( auto n_vertex = mesh.vertices_.size(); proxy_intensity.resize(n_vertex); - tbb::parallel_for(tbb::blocked_range( - 0, n_vertex, utility::DefaultGrainSizeTBB()), + tbb::parallel_for( + tbb::blocked_range(0, n_vertex, + utility::DefaultGrainSizeTBB()), [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - proxy_intensity[i] = 0.0; - float sum = 0.0; - for (size_t iter = 0; iter < visibility_vertex_to_image[i].size(); - iter++) { - int j = visibility_vertex_to_image[i][iter]; - float gray; - bool valid = false; - if (warping_fields.has_value()) { - std::tie(valid, gray) = QueryImageIntensity( - images_gray[j], warping_fields.value()[j], - mesh.vertices_[i], camera_trajectory.parameters_[j], - utility::nullopt, image_boundary_margin); - } else { - std::tie(valid, gray) = QueryImageIntensity( - images_gray[j], utility::nullopt, mesh.vertices_[i], - camera_trajectory.parameters_[j], utility::nullopt, - image_boundary_margin); - } + for (std::size_t i = range.begin(); i < range.end(); ++i) { + proxy_intensity[i] = 0.0; + float sum = 0.0; + for (size_t iter = 0; + iter < visibility_vertex_to_image[i].size(); iter++) { + int j = visibility_vertex_to_image[i][iter]; + float gray; + bool valid = false; + if (warping_fields.has_value()) { + std::tie(valid, gray) = QueryImageIntensity( + images_gray[j], warping_fields.value()[j], + mesh.vertices_[i], + camera_trajectory.parameters_[j], + utility::nullopt, image_boundary_margin); + } else { + std::tie(valid, gray) = QueryImageIntensity( + images_gray[j], utility::nullopt, + mesh.vertices_[i], + camera_trajectory.parameters_[j], + utility::nullopt, image_boundary_margin); + } - if (valid) { - sum += 1.0; - proxy_intensity[i] += gray; + if (valid) { + sum += 1.0; + proxy_intensity[i] += gray; + } + } + if (sum > 0) { + proxy_intensity[i] /= sum; + } } - } - if (sum > 0) { - proxy_intensity[i] /= sum; - } - } - }); + }); } void SetGeometryColorAverage( @@ -252,70 +261,76 @@ void SetGeometryColorAverage( tbb::concurrent_vector valid_vertices; tbb::concurrent_vector invalid_vertices; - tbb::parallel_for(tbb::blocked_range( - 0, n_vertex, utility::DefaultGrainSizeTBB()), + tbb::parallel_for( + tbb::blocked_range(0, n_vertex, + utility::DefaultGrainSizeTBB()), [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - mesh.vertex_colors_[i] = Eigen::Vector3d::Zero(); - double sum = 0.0; - for (size_t iter = 0; iter < visibility_vertex_to_image[i].size(); - iter++) { - int j = visibility_vertex_to_image[i][iter]; - uint8_t r_temp, g_temp, b_temp; - bool valid = false; - utility::optional optional_warping_field; - if (warping_fields.has_value()) { - optional_warping_field = warping_fields.value()[j]; - } else { - optional_warping_field = utility::nullopt; - } - std::tie(valid, r_temp) = QueryImageIntensity( - images_color[j], optional_warping_field, - mesh.vertices_[i],camera_trajectory.parameters_[j], - 0, image_boundary_margin); - std::tie(valid, g_temp) = QueryImageIntensity( - images_color[j], optional_warping_field, - mesh.vertices_[i], camera_trajectory.parameters_[j], - 1, image_boundary_margin); - std::tie(valid, b_temp) = QueryImageIntensity( - images_color[j], optional_warping_field, - mesh.vertices_[i], camera_trajectory.parameters_[j], - 2, image_boundary_margin); - float r = (float)r_temp / 255.0f; - float g = (float)g_temp / 255.0f; - float b = (float)b_temp / 255.0f; - if (valid) { - mesh.vertex_colors_[i] += Eigen::Vector3d(r, g, b); - sum += 1.0; + for (std::size_t i = range.begin(); i < range.end(); ++i) { + mesh.vertex_colors_[i] = Eigen::Vector3d::Zero(); + double sum = 0.0; + for (size_t iter = 0; + iter < visibility_vertex_to_image[i].size(); iter++) { + int j = visibility_vertex_to_image[i][iter]; + uint8_t r_temp, g_temp, b_temp; + bool valid = false; + utility::optional + optional_warping_field; + if (warping_fields.has_value()) { + optional_warping_field = warping_fields.value()[j]; + } else { + optional_warping_field = utility::nullopt; + } + std::tie(valid, r_temp) = QueryImageIntensity( + images_color[j], optional_warping_field, + mesh.vertices_[i], + camera_trajectory.parameters_[j], 0, + image_boundary_margin); + std::tie(valid, g_temp) = QueryImageIntensity( + images_color[j], optional_warping_field, + mesh.vertices_[i], + camera_trajectory.parameters_[j], 1, + image_boundary_margin); + std::tie(valid, b_temp) = QueryImageIntensity( + images_color[j], optional_warping_field, + mesh.vertices_[i], + camera_trajectory.parameters_[j], 2, + image_boundary_margin); + float r = (float)r_temp / 255.0f; + float g = (float)g_temp / 255.0f; + float b = (float)b_temp / 255.0f; + if (valid) { + mesh.vertex_colors_[i] += Eigen::Vector3d(r, g, b); + sum += 1.0; + } + } + if (sum > 0.0) { + mesh.vertex_colors_[i] /= sum; + valid_vertices.push_back(i); + } else { + invalid_vertices.push_back(i); + } } - } - if (sum > 0.0) { - mesh.vertex_colors_[i] /= sum; - valid_vertices.push_back(i); - } else { - invalid_vertices.push_back(i); - } - } - }); + }); if (invisible_vertex_color_knn > 0) { std::shared_ptr valid_mesh = mesh.SelectByIndex( {valid_vertices.begin(), valid_vertices.end()}); geometry::KDTreeFlann kd_tree(*valid_mesh); - tbb::parallel_for_each(invalid_vertices, - [&](std::size_t invalid_vertex){ - std::vector indices; // indices to valid_mesh - std::vector dists; - kd_tree.SearchKNN(mesh.vertices_[invalid_vertex], - invisible_vertex_color_knn, indices, dists); - Eigen::Vector3d new_color(0, 0, 0); - for (const int& index : indices) { - new_color += valid_mesh->vertex_colors_[index]; - } - if (indices.empty()) { - new_color /= static_cast(indices.size()); - } - mesh.vertex_colors_[invalid_vertex] = new_color; - }); + tbb::parallel_for_each( + invalid_vertices, [&](std::size_t invalid_vertex) { + std::vector indices; // indices to valid_mesh + std::vector dists; + kd_tree.SearchKNN(mesh.vertices_[invalid_vertex], + invisible_vertex_color_knn, indices, + dists); + Eigen::Vector3d new_color(0, 0, 0); + for (const int& index : indices) { + new_color += valid_mesh->vertex_colors_[index]; + } + if (indices.empty()) { + new_color /= static_cast(indices.size()); + } + mesh.vertex_colors_[invalid_vertex] = new_color; + }); } } diff --git a/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp b/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp index d93cc16f4b4..7a0861bbce7 100644 --- a/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp +++ b/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp @@ -7,13 +7,13 @@ #include "open3d/pipelines/color_map/NonRigidOptimizer.h" -#include -#include - #include #include #include +#include +#include + #include "open3d/io/ImageIO.h" #include "open3d/io/ImageWarpingFieldIO.h" #include "open3d/io/PinholeCameraTrajectoryIO.h" @@ -47,12 +47,14 @@ static std::vector CreateWarpingFields( return fields; } -template +template struct JTJandJTrNonRigidReducer { // Global Data - using FuncType = std::function; + using FuncType = + std::function; FuncType& f; // Local Data MatOutType JTJ; @@ -60,11 +62,13 @@ struct JTJandJTrNonRigidReducer { double r2_sum = 0.0; JTJandJTrNonRigidReducer(FuncType& f_, int nonrigidval) - : f(f_), JTJ(MatOutType::Zero(6 + nonrigidval, 6 + nonrigidval)), - JTr(VecOutType::Zero(6 + nonrigidval)) {}; + : f(f_), + JTJ(MatOutType::Zero(6 + nonrigidval, 6 + nonrigidval)), + JTr(VecOutType::Zero(6 + nonrigidval)){}; JTJandJTrNonRigidReducer(JTJandJTrNonRigidReducer& o, tbb::split) - : f(o.f), JTJ(MatOutType::Zero(o.JTJ.rows(), o.JTJ.cols())), + : f(o.f), + JTJ(MatOutType::Zero(o.JTJ.rows(), o.JTJ.cols())), JTr(VecOutType::Zero(o.JTr.rows(), o.JTr.cols())) {} void operator()(const tbb::blocked_range& range) { @@ -112,13 +116,17 @@ static std::tuple ComputeJTJandJTrNonRigid( int nonrigidval, bool verbose /*=true*/) { JTJandJTrNonRigidReducer reducer(f, nonrigidval); - tbb::parallel_reduce(tbb::blocked_range(0, iteration_num, - utility::DefaultGrainSizeTBB()), reducer); + VecInTypeInt> + reducer(f, nonrigidval); + tbb::parallel_reduce( + tbb::blocked_range(0, iteration_num, + utility::DefaultGrainSizeTBB()), + reducer); auto out = std::move(reducer).as_tuple(); if (verbose) { utility::LogDebug("Residual : {:.2e} (# of elements : {:d})", - std::get<2>(out) / (double)iteration_num, iteration_num); + std::get<2>(out) / (double)iteration_num, + iteration_num); } return out; } @@ -228,7 +236,8 @@ inline void atomic_sum(std::atomic& total, const double& val) { total.fetch_add(val); #else double expected = total; - while(!total.compare_exchange_weak(expected, expected + val)); + while (!total.compare_exchange_weak(expected, expected + val)) + ; #endif } @@ -316,72 +325,81 @@ RunNonRigidOptimizer(const geometry::TriangleMesh& mesh, utility::LogDebug("[Iteration {:04d}] ", itr + 1); std::atomic residual = 0.0; std::atomic residual_reg = 0.0; - tbb::parallel_for(tbb::blocked_range(0, n_camera, 1), + tbb::parallel_for( + tbb::blocked_range(0, n_camera, 1), [&](const tbb::blocked_range& range) { - for (int c = range.begin(); c < range.end(); ++c) { - int nonrigidval = warping_fields[c].anchor_w_ * - warping_fields[c].anchor_h_ * 2; - double rr_reg = 0.0; - - Eigen::Matrix4d pose; - pose = opt_camera_trajectory.parameters_[c].extrinsic_; - - auto intrinsic = opt_camera_trajectory.parameters_[c] - .intrinsic_.intrinsic_matrix_; - auto extrinsic = opt_camera_trajectory. - parameters_[c].extrinsic_; - Eigen::Matrix4d intr = Eigen::Matrix4d::Zero(); - intr.block<3, 3>(0, 0) = intrinsic; - intr(3, 3) = 1.0; - - auto f_lambda = [&](int i, Eigen::Vector14d& J_r, double& r, - Eigen::Vector14i& pattern) { - ComputeJacobianAndResidualNonRigid( - i, J_r, r, pattern, opt_mesh, proxy_intensity, - images_gray[c], images_dx[c], images_dy[c], - warping_fields[c], intr, extrinsic, - visibility_image_to_vertex[c], - option.image_boundary_margin_); - }; - Eigen::MatrixXd JTJ; - Eigen::VectorXd JTr; - double r2; - std::tie(JTJ, JTr, r2) = - ComputeJTJandJTrNonRigid( - f_lambda, int(visibility_image_to_vertex[c].size()), + for (int c = range.begin(); c < range.end(); ++c) { + int nonrigidval = warping_fields[c].anchor_w_ * + warping_fields[c].anchor_h_ * 2; + double rr_reg = 0.0; + + Eigen::Matrix4d pose; + pose = opt_camera_trajectory.parameters_[c].extrinsic_; + + auto intrinsic = opt_camera_trajectory.parameters_[c] + .intrinsic_.intrinsic_matrix_; + auto extrinsic = + opt_camera_trajectory.parameters_[c].extrinsic_; + Eigen::Matrix4d intr = Eigen::Matrix4d::Zero(); + intr.block<3, 3>(0, 0) = intrinsic; + intr(3, 3) = 1.0; + + auto f_lambda = [&](int i, Eigen::Vector14d& J_r, + double& r, + Eigen::Vector14i& pattern) { + ComputeJacobianAndResidualNonRigid( + i, J_r, r, pattern, opt_mesh, + proxy_intensity, images_gray[c], + images_dx[c], images_dy[c], + warping_fields[c], intr, extrinsic, + visibility_image_to_vertex[c], + option.image_boundary_margin_); + }; + Eigen::MatrixXd JTJ; + Eigen::VectorXd JTr; + double r2; + std::tie(JTJ, JTr, r2) = ComputeJTJandJTrNonRigid< + Eigen::Vector14d, Eigen::Vector14i, + Eigen::MatrixXd, Eigen::VectorXd>( + f_lambda, + int(visibility_image_to_vertex[c].size()), nonrigidval, false); - double weight = option.non_rigid_anchor_point_weight_ * - visibility_image_to_vertex[c].size() / n_vertex; - for (int j = 0; j < nonrigidval; j++) { - double r = weight * (warping_fields[c].flow_(j) - - warping_fields_init[c].flow_(j)); - JTJ(6 + j, 6 + j) += weight * weight; - JTr(6 + j) += weight * r; - rr_reg += r * r; - } - - bool success; - Eigen::VectorXd result; - std::tie(success, result) = utility::SolveLinearSystemPSD( - JTJ, -JTr, /*prefer_sparse=*/false, - /*check_symmetric=*/false, - /*check_det=*/false, /*check_psd=*/false); - Eigen::Vector6d result_pose; - result_pose << result.block(0, 0, 6, 1); - auto delta = utility::TransformVector6dToMatrix4d(result_pose); - pose = delta * pose; - - for (int j = 0; j < nonrigidval; j++) { - warping_fields[c].flow_(j) += result(6 + j); - } - opt_camera_trajectory.parameters_[c].extrinsic_ = pose; - - atomic_sum(residual, r2); - atomic_sum(residual_reg, rr_reg); - } - }); + double weight = option.non_rigid_anchor_point_weight_ * + visibility_image_to_vertex[c].size() / + n_vertex; + for (int j = 0; j < nonrigidval; j++) { + double r = + weight * (warping_fields[c].flow_(j) - + warping_fields_init[c].flow_(j)); + JTJ(6 + j, 6 + j) += weight * weight; + JTr(6 + j) += weight * r; + rr_reg += r * r; + } + + bool success; + Eigen::VectorXd result; + std::tie(success, result) = + utility::SolveLinearSystemPSD( + JTJ, -JTr, /*prefer_sparse=*/false, + /*check_symmetric=*/false, + /*check_det=*/false, + /*check_psd=*/false); + Eigen::Vector6d result_pose; + result_pose << result.block(0, 0, 6, 1); + auto delta = utility::TransformVector6dToMatrix4d( + result_pose); + pose = delta * pose; + + for (int j = 0; j < nonrigidval; j++) { + warping_fields[c].flow_(j) += result(6 + j); + } + opt_camera_trajectory.parameters_[c].extrinsic_ = pose; + + atomic_sum(residual, r2); + atomic_sum(residual_reg, rr_reg); + } + }); utility::LogDebug("Residual error : {:.6f}, reg : {:.6f}", residual, residual_reg); SetProxyIntensityForVertex(opt_mesh, images_gray, warping_fields, diff --git a/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp b/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp index f507e996098..91636d39e50 100644 --- a/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp +++ b/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp @@ -7,11 +7,11 @@ #include "open3d/pipelines/color_map/RigidOptimizer.h" +#include + #include #include -#include - #include "open3d/io/ImageIO.h" #include "open3d/io/PinholeCameraTrajectoryIO.h" #include "open3d/io/TriangleMeshIO.h" @@ -77,7 +77,8 @@ inline void atomic_sum(std::atomic& total, const double& val) { total.fetch_add(val); #else double expected = total; - while(!total.compare_exchange_weak(expected, expected + val)); + while (!total.compare_exchange_weak(expected, expected + val)) + ; #endif } @@ -157,46 +158,50 @@ RunRigidOptimizer(const geometry::TriangleMesh& mesh, utility::LogDebug("[Iteration {:04d}] ", itr + 1); std::atomic residual = 0.0; total_num_ = 0; - tbb::parallel_for(tbb::blocked_range(0, n_camera, 1), + tbb::parallel_for( + tbb::blocked_range(0, n_camera, 1), [&](const tbb::blocked_range& range) { - for (int c = range.begin(); c < range.end(); ++c) { - Eigen::Matrix4d pose; - pose = opt_camera_trajectory.parameters_[c].extrinsic_; - - auto intrinsic = opt_camera_trajectory. - parameters_[c].intrinsic_.intrinsic_matrix_; - auto extrinsic = opt_camera_trajectory. - parameters_[c].extrinsic_; - Eigen::Matrix4d intr = Eigen::Matrix4d::Zero(); - intr.block<3, 3>(0, 0) = intrinsic; - intr(3, 3) = 1.0; - - auto f_lambda = [&](int i, Eigen::Vector6d& J_r, - double& r, double& w) { - w = 1.0; // Dummy. - ComputeJacobianAndResidualRigid( - i, J_r, r, w, opt_mesh, proxy_intensity, - images_gray[c], images_dx[c], images_dy[c], intr, - extrinsic, visibility_image_to_vertex[c], - option.image_boundary_margin_); - }; - Eigen::Matrix6d JTJ; - Eigen::Vector6d JTr; - double r2; - std::tie(JTJ, JTr, r2) = utility::ComputeJTJandJTr< - Eigen::Matrix6d, Eigen::Vector6d>(f_lambda, - int(visibility_image_to_vertex[c].size()), false); - - Eigen::Matrix4d delta; - std::tie(std::ignore, delta) = - utility::SolveJacobianSystemAndObtainExtrinsicMatrix( - JTJ, JTr); - pose = delta * pose; - opt_camera_trajectory.parameters_[c].extrinsic_ = pose; - atomic_sum(residual, r2); - total_num_ += int(visibility_image_to_vertex[c].size()); - } - }); + for (int c = range.begin(); c < range.end(); ++c) { + Eigen::Matrix4d pose; + pose = opt_camera_trajectory.parameters_[c].extrinsic_; + + auto intrinsic = opt_camera_trajectory.parameters_[c] + .intrinsic_.intrinsic_matrix_; + auto extrinsic = + opt_camera_trajectory.parameters_[c].extrinsic_; + Eigen::Matrix4d intr = Eigen::Matrix4d::Zero(); + intr.block<3, 3>(0, 0) = intrinsic; + intr(3, 3) = 1.0; + + auto f_lambda = [&](int i, Eigen::Vector6d& J_r, + double& r, double& w) { + w = 1.0; // Dummy. + ComputeJacobianAndResidualRigid( + i, J_r, r, w, opt_mesh, proxy_intensity, + images_gray[c], images_dx[c], images_dy[c], + intr, extrinsic, + visibility_image_to_vertex[c], + option.image_boundary_margin_); + }; + Eigen::Matrix6d JTJ; + Eigen::Vector6d JTr; + double r2; + std::tie(JTJ, JTr, r2) = utility::ComputeJTJandJTr< + Eigen::Matrix6d, Eigen::Vector6d>( + f_lambda, + int(visibility_image_to_vertex[c].size()), + false); + + Eigen::Matrix4d delta; + std::tie(std::ignore, delta) = utility:: + SolveJacobianSystemAndObtainExtrinsicMatrix( + JTJ, JTr); + pose = delta * pose; + opt_camera_trajectory.parameters_[c].extrinsic_ = pose; + atomic_sum(residual, r2); + total_num_ += int(visibility_image_to_vertex[c].size()); + } + }); if (total_num_ > 0) { utility::LogDebug("Residual error : {:.6f} (avg : {:.6f})", residual, residual / total_num_); diff --git a/cpp/open3d/pipelines/integration/UniformTSDFVolume.cpp b/cpp/open3d/pipelines/integration/UniformTSDFVolume.cpp index 6698fc149d5..a4d27970b4c 100644 --- a/cpp/open3d/pipelines/integration/UniformTSDFVolume.cpp +++ b/cpp/open3d/pipelines/integration/UniformTSDFVolume.cpp @@ -7,14 +7,14 @@ #include "open3d/pipelines/integration/UniformTSDFVolume.h" +#include +#include +#include + #include #include #include -#include -#include -#include - #include "open3d/geometry/VoxelGrid.h" #include "open3d/pipelines/integration/MarchingCubesConst.h" #include "open3d/utility/Helper.h" @@ -261,28 +261,34 @@ std::shared_ptr UniformTSDFVolume::ExtractVoxelGrid() voxel_grid->voxel_size_ = voxel_length_; voxel_grid->origin_ = origin_; - tbb::parallel_for(tbb::blocked_range3d( - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D()), - [&](const tbb::blocked_range3d& range){ - for (int x = range.pages().begin(); x < range.pages().end(); x++) { - for (int y = range.rows().begin(); y < range.rows().end(); y++) { - for (int z = range.cols().begin(); z < range.cols().end(); z++){ - const int ind = IndexOf(x, y, z); - const float w = voxels_[ind].weight_; - const float f = voxels_[ind].tsdf_; - if (w != 0.0f && f < 0.98f && f >= -0.98f) { - double c = (f + 1.0) * 0.5; - Eigen::Vector3d color = Eigen::Vector3d(c, c, c); - Eigen::Vector3i index = Eigen::Vector3i(x, y, z); - voxel_grid->voxels_[index] = - geometry::Voxel(index, color); + tbb::parallel_for( + tbb::blocked_range3d( + 0, resolution_, utility::DefaultGrainSizeTBB2D(), 0, + resolution_, utility::DefaultGrainSizeTBB2D(), 0, + resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range3d &range) { + for (int x = range.pages().begin(); x < range.pages().end(); + x++) { + for (int y = range.rows().begin(); y < range.rows().end(); + y++) { + for (int z = range.cols().begin(); + z < range.cols().end(); z++) { + const int ind = IndexOf(x, y, z); + const float w = voxels_[ind].weight_; + const float f = voxels_[ind].tsdf_; + if (w != 0.0f && f < 0.98f && f >= -0.98f) { + double c = (f + 1.0) * 0.5; + Eigen::Vector3d color = + Eigen::Vector3d(c, c, c); + Eigen::Vector3i index = + Eigen::Vector3i(x, y, z); + voxel_grid->voxels_[index] = + geometry::Voxel(index, color); + } + } } } - } - } - }); + }); return voxel_grid; } @@ -291,21 +297,25 @@ std::vector UniformTSDFVolume::ExtractVolumeTSDF() const { sharedvoxels_.resize(voxel_num_); tbb::parallel_for(tbb::blocked_range3d( - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D()), - [&](const tbb::blocked_range3d& range){ - for (int x = range.pages().begin(); x < range.pages().end(); x++) { - for (int y = range.rows().begin(); y < range.rows().end(); y++) { - for (int z = range.cols().begin(); z < range.cols().end(); z++){ - const int ind = IndexOf(x, y, z); - const float f = voxels_[ind].tsdf_; - const float w = voxels_[ind].weight_; - sharedvoxels_[ind] = Eigen::Vector2d(f, w); - } - } - } - }); + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range3d &range) { + for (int x = range.pages().begin(); + x < range.pages().end(); x++) { + for (int y = range.rows().begin(); + y < range.rows().end(); y++) { + for (int z = range.cols().begin(); + z < range.cols().end(); z++) { + const int ind = IndexOf(x, y, z); + const float f = voxels_[ind].tsdf_; + const float w = voxels_[ind].weight_; + sharedvoxels_[ind] = + Eigen::Vector2d(f, w); + } + } + } + }); return sharedvoxels_; } @@ -314,57 +324,67 @@ std::vector UniformTSDFVolume::ExtractVolumeColor() const { sharedcolors_.resize(voxel_num_); tbb::parallel_for(tbb::blocked_range3d( - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D()), - [&](const tbb::blocked_range3d& range){ - for (int x = range.pages().begin(); x < range.pages().end(); x++) { - for (int y = range.rows().begin(); y < range.rows().end(); y++) { - for (int z = range.cols().begin(); z < range.cols().end(); z++){ - const int ind = IndexOf(x, y, z); - sharedcolors_[ind] = voxels_[ind].color_; - } - } - } - }); + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range3d &range) { + for (int x = range.pages().begin(); + x < range.pages().end(); x++) { + for (int y = range.rows().begin(); + y < range.rows().end(); y++) { + for (int z = range.cols().begin(); + z < range.cols().end(); z++) { + const int ind = IndexOf(x, y, z); + sharedcolors_[ind] = voxels_[ind].color_; + } + } + } + }); return sharedcolors_; } void UniformTSDFVolume::InjectVolumeTSDF( const std::vector &sharedvoxels) { tbb::parallel_for(tbb::blocked_range3d( - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D()), - [&](const tbb::blocked_range3d& range){ - for (int x = range.pages().begin(); x < range.pages().end(); x++) { - for (int y = range.rows().begin(); y < range.rows().end(); y++) { - for (int z = range.cols().begin(); z < range.cols().end(); z++){ - const int ind = IndexOf(x, y, z); - voxels_[ind].tsdf_ = sharedvoxels[ind](0); - voxels_[ind].weight_ = sharedvoxels[ind](1); - } - } - } - }); + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range3d &range) { + for (int x = range.pages().begin(); + x < range.pages().end(); x++) { + for (int y = range.rows().begin(); + y < range.rows().end(); y++) { + for (int z = range.cols().begin(); + z < range.cols().end(); z++) { + const int ind = IndexOf(x, y, z); + voxels_[ind].tsdf_ = sharedvoxels[ind](0); + voxels_[ind].weight_ = + sharedvoxels[ind](1); + } + } + } + }); } void UniformTSDFVolume::InjectVolumeColor( const std::vector &sharedcolors) { tbb::parallel_for(tbb::blocked_range3d( - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D()), - [&](const tbb::blocked_range3d& range){ - for (int x = range.pages().begin(); x < range.pages().end(); x++) { - for (int y = range.rows().begin(); y < range.rows().end(); y++) { - for (int z = range.cols().begin(); z < range.cols().end(); z++){ - const int ind = IndexOf(x, y, z); - voxels_[ind].color_ = sharedcolors[ind]; - } - } - } - }); + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D(), + 0, resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range3d &range) { + for (int x = range.pages().begin(); + x < range.pages().end(); x++) { + for (int y = range.rows().begin(); + y < range.rows().end(); y++) { + for (int z = range.cols().begin(); + z < range.cols().end(); z++) { + const int ind = IndexOf(x, y, z); + voxels_[ind].color_ = sharedcolors[ind]; + } + } + } + }); } void UniformTSDFVolume::IntegrateWithDepthToCameraDistanceMultiplier( @@ -385,75 +405,88 @@ void UniformTSDFVolume::IntegrateWithDepthToCameraDistanceMultiplier( const float safe_width_f = intrinsic.width_ - 0.0001f; const float safe_height_f = intrinsic.height_ - 0.0001f; - tbb::parallel_for(tbb::blocked_range2d( - 0, resolution_, utility::DefaultGrainSizeTBB2D(), - 0, resolution_, utility::DefaultGrainSizeTBB2D()), - [&](const tbb::blocked_range2d& range) { - for (int x = range.rows().begin(); x < range.rows().end(); ++x) { - for (int y = range.cols().begin(); y < range.cols().end(); ++y) { - Eigen::Vector4f pt_3d_homo( - float(half_voxel_length_f + voxel_length_f * x + origin_(0)), - float(half_voxel_length_f + voxel_length_f * y + origin_(1)), - float(half_voxel_length_f + origin_(2)), 1.f); - Eigen::Vector4f pt_camera = extrinsic_f * pt_3d_homo; - for (int z = 0; z < resolution_; z++, - pt_camera(0) += extrinsic_scaled_f(0, 2), - pt_camera(1) += extrinsic_scaled_f(1, 2), - pt_camera(2) += extrinsic_scaled_f(2, 2)) { - // Skip if negative depth after projection - if (pt_camera(2) <= 0) { - continue; - } - // Skip if x-y coordinate not in range - float u_f = pt_camera(0) * fx / pt_camera(2) + cx + 0.5f; - float v_f = pt_camera(1) * fy / pt_camera(2) + cy + 0.5f; - if (!(u_f >= 0.0001f && u_f < safe_width_f && v_f >= 0.0001f && - v_f < safe_height_f)) { - continue; - } - // Skip if negative depth in depth image - int u = (int)u_f; - int v = (int)v_f; - float d = *image.depth_.PointerAt(u, v); - if (d <= 0.0f) { - continue; - } + tbb::parallel_for( + tbb::blocked_range2d( + 0, resolution_, utility::DefaultGrainSizeTBB2D(), 0, + resolution_, utility::DefaultGrainSizeTBB2D()), + [&](const tbb::blocked_range2d &range) { + for (int x = range.rows().begin(); x < range.rows().end(); + ++x) { + for (int y = range.cols().begin(); y < range.cols().end(); + ++y) { + Eigen::Vector4f pt_3d_homo( + float(half_voxel_length_f + voxel_length_f * x + + origin_(0)), + float(half_voxel_length_f + voxel_length_f * y + + origin_(1)), + float(half_voxel_length_f + origin_(2)), 1.f); + Eigen::Vector4f pt_camera = extrinsic_f * pt_3d_homo; + for (int z = 0; z < resolution_; z++, + pt_camera(0) += extrinsic_scaled_f(0, 2), + pt_camera(1) += extrinsic_scaled_f(1, 2), + pt_camera(2) += extrinsic_scaled_f(2, 2)) { + // Skip if negative depth after projection + if (pt_camera(2) <= 0) { + continue; + } + // Skip if x-y coordinate not in range + float u_f = pt_camera(0) * fx / pt_camera(2) + cx + + 0.5f; + float v_f = pt_camera(1) * fy / pt_camera(2) + cy + + 0.5f; + if (!(u_f >= 0.0001f && u_f < safe_width_f && + v_f >= 0.0001f && v_f < safe_height_f)) { + continue; + } + // Skip if negative depth in depth image + int u = (int)u_f; + int v = (int)v_f; + float d = *image.depth_.PointerAt(u, v); + if (d <= 0.0f) { + continue; + } - int v_ind = IndexOf(x, y, z); - float sdf = (d - pt_camera(2)) * - (*depth_to_camera_distance_multiplier - .PointerAt(u, v)); - if (sdf > -sdf_trunc_f) { - // integrate - float tsdf = std::min(1.0f, sdf * sdf_trunc_inv_f); - voxels_[v_ind].tsdf_ = - (voxels_[v_ind].tsdf_ * voxels_[v_ind].weight_ + - tsdf) / - (voxels_[v_ind].weight_ + 1.0f); - if (color_type_ == TSDFVolumeColorType::RGB8) { - const uint8_t *rgb = - image.color_.PointerAt(u, v, 0); - Eigen::Vector3d rgb_f(rgb[0], rgb[1], rgb[2]); - voxels_[v_ind].color_ = - (voxels_[v_ind].color_ * - voxels_[v_ind].weight_ + - rgb_f) / - (voxels_[v_ind].weight_ + 1.0f); - } else if (color_type_ == TSDFVolumeColorType::Gray32) { - const float *intensity = - image.color_.PointerAt(u, v, 0); - voxels_[v_ind].color_ = - (voxels_[v_ind].color_.array() * - voxels_[v_ind].weight_ + - (*intensity)) / - (voxels_[v_ind].weight_ + 1.0f); + int v_ind = IndexOf(x, y, z); + float sdf = (d - pt_camera(2)) * + (*depth_to_camera_distance_multiplier + .PointerAt(u, v)); + if (sdf > -sdf_trunc_f) { + // integrate + float tsdf = + std::min(1.0f, sdf * sdf_trunc_inv_f); + voxels_[v_ind].tsdf_ = + (voxels_[v_ind].tsdf_ * + voxels_[v_ind].weight_ + + tsdf) / + (voxels_[v_ind].weight_ + 1.0f); + if (color_type_ == TSDFVolumeColorType::RGB8) { + const uint8_t *rgb = + image.color_.PointerAt( + u, v, 0); + Eigen::Vector3d rgb_f(rgb[0], rgb[1], + rgb[2]); + voxels_[v_ind].color_ = + (voxels_[v_ind].color_ * + voxels_[v_ind].weight_ + + rgb_f) / + (voxels_[v_ind].weight_ + 1.0f); + } else if (color_type_ == + TSDFVolumeColorType::Gray32) { + const float *intensity = + image.color_.PointerAt(u, v, + 0); + voxels_[v_ind].color_ = + (voxels_[v_ind].color_.array() * + voxels_[v_ind].weight_ + + (*intensity)) / + (voxels_[v_ind].weight_ + 1.0f); + } + voxels_[v_ind].weight_ += 1.0f; + } } - voxels_[v_ind].weight_ += 1.0f; } } - } - } - }); + }); } Eigen::Vector3d UniformTSDFVolume::GetNormalAt(const Eigen::Vector3d &p) { diff --git a/cpp/open3d/pipelines/odometry/Odometry.cpp b/cpp/open3d/pipelines/odometry/Odometry.cpp index d326c74f283..a5a77fd0f4a 100644 --- a/cpp/open3d/pipelines/odometry/Odometry.cpp +++ b/cpp/open3d/pipelines/odometry/Odometry.cpp @@ -17,8 +17,8 @@ #include "open3d/geometry/RGBDImage.h" #include "open3d/pipelines/odometry/RGBDOdometryJacobian.h" #include "open3d/utility/Eigen.h" -#include "open3d/utility/Timer.h" #include "open3d/utility/Parallel.h" +#include "open3d/utility/Timer.h" namespace open3d { namespace pipelines { @@ -26,28 +26,34 @@ namespace odometry { struct CorrespondenceReduction { // Global - const geometry::Image& depth_s; - const geometry::Image& depth_t; - const Eigen::Matrix3d& KRK_inv; - const Eigen::Vector3d& Kt; + const geometry::Image &depth_s; + const geometry::Image &depth_t; + const Eigen::Matrix3d &KRK_inv; + const Eigen::Vector3d &Kt; const double depth_diff_max; // Local geometry::Image correspondence_map; geometry::Image depth_buffer; - CorrespondenceReduction(const geometry::Image& depth_s_, - const geometry::Image& depth_t_, - const Eigen::Matrix3d& KRK_inv_, - const Eigen::Vector3d& Kt_, + CorrespondenceReduction(const geometry::Image &depth_s_, + const geometry::Image &depth_t_, + const Eigen::Matrix3d &KRK_inv_, + const Eigen::Vector3d &Kt_, const double depth_diff_max_) - : depth_s(depth_s_), depth_t(depth_t_), KRK_inv(KRK_inv_), - Kt(Kt_), depth_diff_max(depth_diff_max_) { + : depth_s(depth_s_), + depth_t(depth_t_), + KRK_inv(KRK_inv_), + Kt(Kt_), + depth_diff_max(depth_diff_max_) { InitializeCorrespondenceMap(); } - CorrespondenceReduction(CorrespondenceReduction& o, tbb::split) - : depth_s(o.depth_s), depth_t(o.depth_t), KRK_inv(o.KRK_inv), - Kt(o.Kt), depth_diff_max(o.depth_diff_max) { + CorrespondenceReduction(CorrespondenceReduction &o, tbb::split) + : depth_s(o.depth_s), + depth_t(o.depth_t), + KRK_inv(o.KRK_inv), + Kt(o.Kt), + depth_diff_max(o.depth_diff_max) { InitializeCorrespondenceMap(); } @@ -83,7 +89,7 @@ struct CorrespondenceReduction { } } - void MergeCorrespondenceMaps(const CorrespondenceReduction& rhs) { + void MergeCorrespondenceMaps(const CorrespondenceReduction &rhs) { for (int v_s = 0; v_s < correspondence_map.height_; v_s++) { for (int u_s = 0; u_s < correspondence_map.width_; u_s++) { int u_t = *rhs.correspondence_map.PointerAt(u_s, v_s, 0); @@ -91,14 +97,14 @@ struct CorrespondenceReduction { if (u_t != -1 && v_t != -1) { float transformed_d_t = *rhs.depth_buffer.PointerAt(u_s, v_s); - AddElementToCorrespondenceMap( - u_s, v_s, u_t, v_t, transformed_d_t); + AddElementToCorrespondenceMap(u_s, v_s, u_t, v_t, + transformed_d_t); } } } } - void operator()(const tbb::blocked_range2d&r) { + void operator()(const tbb::blocked_range2d &r) { for (int v_s = r.rows().begin(); v_s < r.rows().end(); ++v_s) { for (int u_s = r.cols().begin(); u_s < r.cols().end(); u_s++) { double d_s = *depth_s.PointerAt(u_s, v_s); @@ -112,10 +118,9 @@ struct CorrespondenceReduction { v_t < depth_t.height_) { double d_t = *depth_t.PointerAt(u_t, v_t); if (!std::isnan(d_t) && - std::abs(transformed_d_s - d_t) <= - depth_diff_max) { - AddElementToCorrespondenceMap( - u_s, v_s, u_t, v_t, (float)d_s); + std::abs(transformed_d_s - d_t) <= depth_diff_max) { + AddElementToCorrespondenceMap(u_s, v_s, u_t, v_t, + (float)d_s); } } } @@ -123,7 +128,7 @@ struct CorrespondenceReduction { } } - inline void join(const CorrespondenceReduction& rhs) { + inline void join(const CorrespondenceReduction &rhs) { MergeCorrespondenceMaps(rhs); } @@ -169,11 +174,13 @@ CorrespondenceSetPixelWise ComputeCorrespondence( const Eigen::Matrix3d KRK_inv = K * R * K_inv; const Eigen::Vector3d Kt = K * extrinsic.block<3, 1>(0, 3); - CorrespondenceReduction reducer(depth_s, depth_t, - KRK_inv, Kt, option.depth_diff_max_); - tbb::parallel_reduce(tbb::blocked_range2d( - 0, depth_s.height_, utility::DefaultGrainSizeTBB2D(), - 0, depth_s.width_, utility::DefaultGrainSizeTBB2D()), reducer); + CorrespondenceReduction reducer(depth_s, depth_t, KRK_inv, Kt, + option.depth_diff_max_); + tbb::parallel_reduce( + tbb::blocked_range2d( + 0, depth_s.height_, utility::DefaultGrainSizeTBB2D(), 0, + depth_s.width_, utility::DefaultGrainSizeTBB2D()), + reducer); return reducer.correspondences(); } @@ -223,19 +230,19 @@ static std::vector CreateCameraMatrixPyramid( struct InformationMatrixReducer { // Globals - const CorrespondenceSetPixelWise& corres; - const geometry::Image& xyz_t; + const CorrespondenceSetPixelWise &corres; + const geometry::Image &xyz_t; // Locals Eigen::Matrix6d GTG; - InformationMatrixReducer(const CorrespondenceSetPixelWise& corres_, - const geometry::Image& xyz_t_) + InformationMatrixReducer(const CorrespondenceSetPixelWise &corres_, + const geometry::Image &xyz_t_) : corres(corres_), xyz_t(xyz_t_), GTG(Eigen::Matrix6d::Zero()) {} - InformationMatrixReducer(InformationMatrixReducer& o, tbb::split) + InformationMatrixReducer(InformationMatrixReducer &o, tbb::split) : corres(o.corres), xyz_t(o.xyz_t), GTG(Eigen::Matrix6d::Zero()) {} - void operator()(const tbb::blocked_range& range) { + void operator()(const tbb::blocked_range &range) { // write q^* // see http://redwood-data.org/indoor/registration.html // note: I comes first in this implementation @@ -264,9 +271,7 @@ struct InformationMatrixReducer { } } - void join(InformationMatrixReducer& other) { - GTG += other.GTG; - } + void join(InformationMatrixReducer &other) { GTG += other.GTG; } }; static Eigen::Matrix6d CreateInformationMatrix( @@ -283,8 +288,10 @@ static Eigen::Matrix6d CreateInformationMatrix( depth_t, pinhole_camera_intrinsic.intrinsic_matrix_); InformationMatrixReducer reducer(correspondence, *xyz_t.get()); - tbb::parallel_reduce(tbb::blocked_range(0, - correspondence.size(), utility::DefaultGrainSizeTBB()), reducer); + tbb::parallel_reduce( + tbb::blocked_range(0, correspondence.size(), + utility::DefaultGrainSizeTBB()), + reducer); return std::move(reducer.GTG); } diff --git a/cpp/open3d/pipelines/registration/FastGlobalRegistration.cpp b/cpp/open3d/pipelines/registration/FastGlobalRegistration.cpp index da9dab27a82..5b9f7a8aab7 100644 --- a/cpp/open3d/pipelines/registration/FastGlobalRegistration.cpp +++ b/cpp/open3d/pipelines/registration/FastGlobalRegistration.cpp @@ -7,17 +7,18 @@ #include "open3d/pipelines/registration/FastGlobalRegistration.h" -#include #include #include +#include + #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/PointCloud.h" #include "open3d/pipelines/registration/Feature.h" #include "open3d/pipelines/registration/Registration.h" #include "open3d/utility/Logging.h" -#include "open3d/utility/Random.h" #include "open3d/utility/Parallel.h" +#include "open3d/utility/Random.h" namespace open3d { namespace pipelines { @@ -30,33 +31,39 @@ static std::vector> InitialMatching( std::vector corres_ij(src_features.data_.cols(), -1); std::vector corres_ji(dst_features.data_.cols(), -1); - tbb::parallel_invoke([&](){ - tbb::parallel_for(tbb::blocked_range(0, - src_features.data_.cols(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range) { - for (int i = range.begin(); i < range.end(); ++i) { - std::vector corres_tmp(1); - std::vector dist_tmp(1); - dst_feature_tree.SearchKNN( - Eigen::VectorXd(src_features.data_.col(i)), - 1, corres_tmp, dist_tmp); - corres_ij[i] = corres_tmp[0]; - } - }); - }, [&](){ - tbb::parallel_for(tbb::blocked_range(0, - dst_features.data_.cols(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range) { - for (int j = range.begin(); j < range.end(); ++j) { - std::vector corres_tmp(1); - std::vector dist_tmp(1); - src_feature_tree.SearchKNN( - Eigen::VectorXd(dst_features.data_.col(j)), - 1, corres_tmp, dist_tmp); - corres_ji[j] = corres_tmp[0]; - } - }); - }); + tbb::parallel_invoke( + [&]() { + tbb::parallel_for( + tbb::blocked_range(0, src_features.data_.cols(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); ++i) { + std::vector corres_tmp(1); + std::vector dist_tmp(1); + dst_feature_tree.SearchKNN( + Eigen::VectorXd( + src_features.data_.col(i)), + 1, corres_tmp, dist_tmp); + corres_ij[i] = corres_tmp[0]; + } + }); + }, + [&]() { + tbb::parallel_for( + tbb::blocked_range(0, dst_features.data_.cols(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range& range) { + for (int j = range.begin(); j < range.end(); ++j) { + std::vector corres_tmp(1); + std::vector dist_tmp(1); + src_feature_tree.SearchKNN( + Eigen::VectorXd( + dst_features.data_.col(j)), + 1, corres_tmp, dist_tmp); + corres_ji[j] = corres_tmp[0]; + } + }); + }); utility::LogDebug("\t[cross check] "); std::vector> corres_cross; diff --git a/cpp/open3d/pipelines/registration/Feature.cpp b/cpp/open3d/pipelines/registration/Feature.cpp index 73580575b02..61c3ffe6511 100644 --- a/cpp/open3d/pipelines/registration/Feature.cpp +++ b/cpp/open3d/pipelines/registration/Feature.cpp @@ -7,10 +7,10 @@ #include "open3d/pipelines/registration/Feature.h" -#include - #include +#include + #include "open3d/geometry/KDTreeFlann.h" #include "open3d/geometry/PointCloud.h" #include "open3d/utility/Logging.h" @@ -60,38 +60,41 @@ static std::shared_ptr ComputeSPFHFeature( const geometry::KDTreeSearchParam &search_param) { auto feature = std::make_shared(); feature->Resize(33, (int)input.points_.size()); - tbb::parallel_for(tbb::blocked_range( - 0, input.points_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - const auto &point = input.points_[i]; - const auto &normal = input.normals_[i]; - std::vector indices; - std::vector distance2; - if (kdtree.Search(point, search_param, indices, distance2) > 1) { - // only compute SPFH feature when a point has neighbors - double hist_incr = 100.0 / (double)(indices.size() - 1); - for (size_t k = 1; k < indices.size(); k++) { - // skip the point itself, compute histogram - auto pf = ComputePairFeatures(point, normal, - input.points_[indices[k]], - input.normals_[indices[k]]); - int h_index = (int)(floor(11 * (pf(0) + M_PI) / (2.0 * M_PI))); - if (h_index < 0) h_index = 0; - if (h_index >= 11) h_index = 10; - feature->data_(h_index, i) += hist_incr; - h_index = (int)(floor(11 * (pf(1) + 1.0) * 0.5)); - if (h_index < 0) h_index = 0; - if (h_index >= 11) h_index = 10; - feature->data_(h_index + 11, i) += hist_incr; - h_index = (int)(floor(11 * (pf(2) + 1.0) * 0.5)); - if (h_index < 0) h_index = 0; - if (h_index >= 11) h_index = 10; - feature->data_(h_index + 22, i) += hist_incr; + tbb::parallel_for( + tbb::blocked_range(0, input.points_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + const auto &point = input.points_[i]; + const auto &normal = input.normals_[i]; + std::vector indices; + std::vector distance2; + if (kdtree.Search(point, search_param, indices, distance2) > + 1) { + // only compute SPFH feature when a point has neighbors + double hist_incr = 100.0 / (double)(indices.size() - 1); + for (size_t k = 1; k < indices.size(); k++) { + // skip the point itself, compute histogram + auto pf = ComputePairFeatures( + point, normal, input.points_[indices[k]], + input.normals_[indices[k]]); + int h_index = (int)(floor(11 * (pf(0) + M_PI) / + (2.0 * M_PI))); + if (h_index < 0) h_index = 0; + if (h_index >= 11) h_index = 10; + feature->data_(h_index, i) += hist_incr; + h_index = (int)(floor(11 * (pf(1) + 1.0) * 0.5)); + if (h_index < 0) h_index = 0; + if (h_index >= 11) h_index = 10; + feature->data_(h_index + 11, i) += hist_incr; + h_index = (int)(floor(11 * (pf(2) + 1.0) * 0.5)); + if (h_index < 0) h_index = 0; + if (h_index >= 11) h_index = 10; + feature->data_(h_index + 22, i) += hist_incr; + } + } } - } - } - }); + }); return feature; } @@ -110,39 +113,41 @@ std::shared_ptr ComputeFPFHFeature( utility::LogError("Internal error: SPFH feature is nullptr."); } - tbb::parallel_for(tbb::blocked_range( - 0, input.points_.size(), utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - const auto &point = input.points_[i]; - std::vector indices; - std::vector distance2; - if (kdtree.Search(point, search_param, indices, distance2) > 1) { - double sum[3] = {0.0, 0.0, 0.0}; - for (size_t k = 1; k < indices.size(); k++) { - // skip the point itself - double dist = distance2[k]; - if (dist == 0.0) continue; - for (int j = 0; j < 33; j++) { - double val = spfh->data_(j, indices[k]) / dist; - sum[j / 11] += val; - feature->data_(j, i) += val; + tbb::parallel_for( + tbb::blocked_range(0, input.points_.size(), + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + const auto &point = input.points_[i]; + std::vector indices; + std::vector distance2; + if (kdtree.Search(point, search_param, indices, distance2) > + 1) { + double sum[3] = {0.0, 0.0, 0.0}; + for (size_t k = 1; k < indices.size(); k++) { + // skip the point itself + double dist = distance2[k]; + if (dist == 0.0) continue; + for (int j = 0; j < 33; j++) { + double val = spfh->data_(j, indices[k]) / dist; + sum[j / 11] += val; + feature->data_(j, i) += val; + } + } + for (int j = 0; j < 3; j++) + if (sum[j] != 0.0) sum[j] = 100.0 / sum[j]; + for (int j = 0; j < 33; j++) { + feature->data_(j, i) *= sum[j / 11]; + // The commented line is the fpfh function in the + // paper. But according to PCL implementation, it is + // skipped. Our initial test shows that the full + // fpfh function in the paper seems to be better + // than PCL implementation. Further test required. + feature->data_(j, i) += spfh->data_(j, i); + } } } - for (int j = 0; j < 3; j++) - if (sum[j] != 0.0) sum[j] = 100.0 / sum[j]; - for (int j = 0; j < 33; j++) { - feature->data_(j, i) *= sum[j / 11]; - // The commented line is the fpfh function in the paper. - // But according to PCL implementation, it is skipped. - // Our initial test shows that the full fpfh function in the - // paper seems to be better than PCL implementation. Further - // test required. - feature->data_(j, i) += spfh->data_(j, i); - } - } - } - }); + }); return feature; } @@ -160,29 +165,35 @@ CorrespondenceSet CorrespondencesFromFeatures(const Feature &source_features, int(target_features.data_.cols())}; std::vector corres(num_searches); - tbb::parallel_for(tbb::blocked_range(0, num_searches, 1), - [&](const tbb::blocked_range& range) { - for (int k = range.begin(); k < range.end(); ++k) { - geometry::KDTreeFlann kdtree(features[1 - k]); - - int num_pts_k = num_pts[k]; - corres[k] = CorrespondenceSet(num_pts_k); - tbb::parallel_for(tbb::blocked_range( - 0, num_pts_k, utility::DefaultGrainSizeTBB()), - [&](const tbb::blocked_range& range) { - for (int i = range.begin(); i < range.end(); ++i) { - std::vector corres_tmp(1); - std::vector dist_tmp(1); - - kdtree.SearchKNN( - Eigen::VectorXd(features[k].get().data_.col(i)), - 1, corres_tmp, dist_tmp); - int j = corres_tmp[0]; - corres[k][i] = Eigen::Vector2i(i, j); + tbb::parallel_for( + tbb::blocked_range(0, num_searches, 1), + [&](const tbb::blocked_range &range) { + for (int k = range.begin(); k < range.end(); ++k) { + geometry::KDTreeFlann kdtree(features[1 - k]); + + int num_pts_k = num_pts[k]; + corres[k] = CorrespondenceSet(num_pts_k); + tbb::parallel_for( + tbb::blocked_range( + 0, num_pts_k, + utility::DefaultGrainSizeTBB()), + [&](const tbb::blocked_range &range) { + for (int i = range.begin(); i < range.end(); + ++i) { + std::vector corres_tmp(1); + std::vector dist_tmp(1); + + kdtree.SearchKNN( + Eigen::VectorXd( + features[k].get().data_.col( + i)), + 1, corres_tmp, dist_tmp); + int j = corres_tmp[0]; + corres[k][i] = Eigen::Vector2i(i, j); + } + }); } }); - } - }); // corres[0]: corres_ij, corres[1]: corres_ji if (!mutual_filter) return corres[0]; diff --git a/cpp/open3d/pipelines/registration/GeneralizedICP.cpp b/cpp/open3d/pipelines/registration/GeneralizedICP.cpp index 99911fb0327..02039132f2e 100644 --- a/cpp/open3d/pipelines/registration/GeneralizedICP.cpp +++ b/cpp/open3d/pipelines/registration/GeneralizedICP.cpp @@ -12,9 +12,10 @@ #include "open3d/pipelines/registration/GeneralizedICP.h" +#include + #include #include -#include #include "open3d/geometry/KDTreeSearchParam.h" #include "open3d/geometry/PointCloud.h" @@ -66,14 +67,14 @@ std::shared_ptr InitializePointCloudForGeneralizedICP( output->covariances_.resize(output->points_.size()); const Eigen::Matrix3d C = Eigen::Vector3d(epsilon, 1, 1).asDiagonal(); - tbb::parallel_for(tbb::blocked_range( - 0, output->normals_.size(), 1), - [&](const tbb::blocked_range& range) { - for (std::size_t i = range.begin(); i < range.end(); ++i) { - const auto Rx = GetRotationFromE1ToX(output->normals_[i]); - output->covariances_[i] = Rx * C * Rx.transpose(); - } - }); + tbb::parallel_for( + tbb::blocked_range(0, output->normals_.size(), 1), + [&](const tbb::blocked_range &range) { + for (std::size_t i = range.begin(); i < range.end(); ++i) { + const auto Rx = GetRotationFromE1ToX(output->normals_[i]); + output->covariances_[i] = Rx * C * Rx.transpose(); + } + }); return output; } diff --git a/cpp/open3d/pipelines/registration/Registration.cpp b/cpp/open3d/pipelines/registration/Registration.cpp index a418ee9038a..a8e0a9b6858 100644 --- a/cpp/open3d/pipelines/registration/Registration.cpp +++ b/cpp/open3d/pipelines/registration/Registration.cpp @@ -33,21 +33,26 @@ struct RegistrationReduction { RegistrationReduction(const geometry::KDTreeFlann& target_kdtree_, const geometry::PointCloud& source_, double max_correspondence_distance_) - : target_kdtree(target_kdtree_), source(source_), + : target_kdtree(target_kdtree_), + source(source_), max_distance(max_correspondence_distance_), - correspondences(), error2(0.0) {} + correspondences(), + error2(0.0) {} RegistrationReduction(RegistrationReduction& other, tbb::split) - : target_kdtree(other.target_kdtree), source(other.source), - max_distance(other.max_distance), correspondences(), error2(0.0) {} + : target_kdtree(other.target_kdtree), + source(other.source), + max_distance(other.max_distance), + correspondences(), + error2(0.0) {} void operator()(const tbb::blocked_range& range) { for (std::size_t i = range.begin(); i < range.end(); ++i) { std::vector indices(1); std::vector dists(1); - const auto &point = source.points_[i]; - if (target_kdtree.SearchHybrid(point, - max_distance, 1, indices, dists) > 0) { + const auto& point = source.points_[i]; + if (target_kdtree.SearchHybrid(point, max_distance, 1, indices, + dists) > 0) { error2 += dists[0]; correspondences.emplace_back(i, indices[0]); } @@ -56,7 +61,8 @@ struct RegistrationReduction { void join(RegistrationReduction& other) { correspondences.insert(correspondences.end(), - other.correspondences.begin(), other.correspondences.end()); + other.correspondences.begin(), + other.correspondences.end()); error2 += other.error2; } @@ -66,24 +72,26 @@ struct RegistrationReduction { }; static RegistrationResult GetRegistrationResultAndCorrespondences( - const geometry::PointCloud &source, - const geometry::PointCloud &target, - const geometry::KDTreeFlann &target_kdtree, + const geometry::PointCloud& source, + const geometry::PointCloud& target, + const geometry::KDTreeFlann& target_kdtree, double max_correspondence_distance, - const Eigen::Matrix4d &transformation) { + const Eigen::Matrix4d& transformation) { RegistrationResult result(transformation); if (max_correspondence_distance <= 0.0) { return result; } - RegistrationReduction reducer(target_kdtree, - source, max_correspondence_distance); - tbb::parallel_reduce(tbb::blocked_range( - 0, source.points_.size(), utility::DefaultGrainSizeTBB()), reducer); + RegistrationReduction reducer(target_kdtree, source, + max_correspondence_distance); + tbb::parallel_reduce( + tbb::blocked_range(0, source.points_.size(), + utility::DefaultGrainSizeTBB()), + reducer); double error2; - std::tie(result.correspondence_set_, error2) - = std::move(reducer).as_tuple(); + std::tie(result.correspondence_set_, error2) = + std::move(reducer).as_tuple(); if (result.correspondence_set_.empty()) { result.fitness_ = 0.0; @@ -97,16 +105,16 @@ static RegistrationResult GetRegistrationResultAndCorrespondences( } static double EvaluateInlierCorrespondenceRatio( - const geometry::PointCloud &source, - const geometry::PointCloud &target, - const CorrespondenceSet &corres, + const geometry::PointCloud& source, + const geometry::PointCloud& target, + const CorrespondenceSet& corres, double max_correspondence_distance, - const Eigen::Matrix4d &transformation) { + const Eigen::Matrix4d& transformation) { RegistrationResult result(transformation); int inlier_corres = 0; double max_dis2 = max_correspondence_distance * max_correspondence_distance; - for (const auto &c : corres) { + for (const auto& c : corres) { double dis2 = (source.points_[c[0]] - target.points_[c[1]]).squaredNorm(); if (dis2 < max_dis2) { @@ -118,11 +126,11 @@ static double EvaluateInlierCorrespondenceRatio( } RegistrationResult EvaluateRegistration( - const geometry::PointCloud &source, - const geometry::PointCloud &target, + const geometry::PointCloud& source, + const geometry::PointCloud& target, double max_correspondence_distance, - const Eigen::Matrix4d - &transformation /* = Eigen::Matrix4d::Identity()*/) { + const Eigen::Matrix4d& + transformation /* = Eigen::Matrix4d::Identity()*/) { geometry::KDTreeFlann kdtree; kdtree.SetGeometry(target); geometry::PointCloud pcd = source; @@ -134,14 +142,14 @@ RegistrationResult EvaluateRegistration( } RegistrationResult RegistrationICP( - const geometry::PointCloud &source, - const geometry::PointCloud &target, + const geometry::PointCloud& source, + const geometry::PointCloud& target, double max_correspondence_distance, - const Eigen::Matrix4d &init /* = Eigen::Matrix4d::Identity()*/, - const TransformationEstimation &estimation + const Eigen::Matrix4d& init /* = Eigen::Matrix4d::Identity()*/, + const TransformationEstimation& estimation /* = TransformationEstimationPointToPoint(false)*/, - const ICPConvergenceCriteria - &criteria /* = ICPConvergenceCriteria()*/) { + const ICPConvergenceCriteria& + criteria /* = ICPConvergenceCriteria()*/) { if (max_correspondence_distance <= 0.0) { utility::LogError("Invalid max_correspondence_distance."); } @@ -198,7 +206,8 @@ RegistrationResult RegistrationICP( template void atomic_min(std::atomic& min_val, const T& val) noexcept { T prev_val = min_val; - while (prev_val > val && min_val.compare_exchange_weak(prev_val, val)); + while (prev_val > val && min_val.compare_exchange_weak(prev_val, val)) + ; } struct RANSACCorrespondenceReduction { @@ -207,8 +216,8 @@ struct RANSACCorrespondenceReduction { const geometry::PointCloud& target; const CorrespondenceSet& corres; const TransformationEstimation& estimation; - using CheckerType = std::vector>; + using CheckerType = + std::vector>; const CheckerType& checkers; const geometry::KDTreeFlann& kdtree; std::atomic& est_k_global; @@ -223,7 +232,6 @@ struct RANSACCorrespondenceReduction { CorrespondenceSet ransac_corres; RegistrationResult best_result; - RANSACCorrespondenceReduction(const geometry::PointCloud& source_, const geometry::PointCloud& target_, const CorrespondenceSet& corres_, @@ -236,18 +244,33 @@ struct RANSACCorrespondenceReduction { double max_dist_, int ransac_n_, double confidence) - : source(source_), target(target_), corres(corres_), - estimation(estimation_), checkers(checkers_), kdtree(kdtree_), - est_k_global(est_k_global_), total_validation(total_validation_), - rand_gen(rand_gen_), max_distance(max_dist_), ransac_n(ransac_n_), - log_confidence(std::log(1.0 - confidence)), ransac_corres(ransac_n) {} - - RANSACCorrespondenceReduction(RANSACCorrespondenceReduction& o, - tbb::split): source(o.source), target(o.target), corres(o.corres), - estimation(o.estimation), checkers(o.checkers), kdtree(o.kdtree), - est_k_global(o.est_k_global), total_validation(o.total_validation), - rand_gen(o.rand_gen), max_distance(o.max_distance), - ransac_n(o.ransac_n), log_confidence(o.log_confidence) {} + : source(source_), + target(target_), + corres(corres_), + estimation(estimation_), + checkers(checkers_), + kdtree(kdtree_), + est_k_global(est_k_global_), + total_validation(total_validation_), + rand_gen(rand_gen_), + max_distance(max_dist_), + ransac_n(ransac_n_), + log_confidence(std::log(1.0 - confidence)), + ransac_corres(ransac_n) {} + + RANSACCorrespondenceReduction(RANSACCorrespondenceReduction& o, tbb::split) + : source(o.source), + target(o.target), + corres(o.corres), + estimation(o.estimation), + checkers(o.checkers), + kdtree(o.kdtree), + est_k_global(o.est_k_global), + total_validation(o.total_validation), + rand_gen(o.rand_gen), + max_distance(o.max_distance), + ransac_n(o.ransac_n), + log_confidence(o.log_confidence) {} void operator()(const tbb::blocked_range& range) { int est_k_local = est_k_global; @@ -263,8 +286,11 @@ struct RANSACCorrespondenceReduction { // Check transformation: inexpensive if (!std::all_of(checkers.begin(), checkers.end(), - [&](const auto& checker){ return checker.get().Check( - source, target, ransac_corres, transformation); })) { + [&](const auto& checker) { + return checker.get().Check(source, target, + ransac_corres, + transformation); + })) { continue; } @@ -313,16 +339,16 @@ struct RANSACCorrespondenceReduction { }; RegistrationResult RegistrationRANSACBasedOnCorrespondence( - const geometry::PointCloud &source, - const geometry::PointCloud &target, - const CorrespondenceSet &corres, + const geometry::PointCloud& source, + const geometry::PointCloud& target, + const CorrespondenceSet& corres, double max_correspondence_distance, - const TransformationEstimation &estimation + const TransformationEstimation& estimation /* = TransformationEstimationPointToPoint(false)*/, int ransac_n /* = 3*/, - const std::vector> - &checkers /* = {}*/, - const RANSACConvergenceCriteria &criteria + const std::vector>& + checkers /* = {}*/, + const RANSACConvergenceCriteria& criteria /* = RANSACConvergenceCriteria()*/) { if (ransac_n < 3 || (int)corres.size() < ransac_n || max_correspondence_distance <= 0.0) { @@ -333,11 +359,14 @@ RegistrationResult RegistrationRANSACBasedOnCorrespondence( std::atomic est_k_global = criteria.max_iteration_; std::atomic total_validation = 0; utility::random::UniformIntGenerator rand_gen(0, corres.size() - 1); - RANSACCorrespondenceReduction reducer(source, target, corres, estimation, - checkers, kdtree, est_k_global, total_validation, rand_gen, - max_correspondence_distance, ransac_n, criteria.confidence_); - tbb::parallel_reduce(tbb::blocked_range( - 0, criteria.max_iteration_, utility::DefaultGrainSizeTBB()), reducer); + RANSACCorrespondenceReduction reducer( + source, target, corres, estimation, checkers, kdtree, est_k_global, + total_validation, rand_gen, max_correspondence_distance, ransac_n, + criteria.confidence_); + tbb::parallel_reduce( + tbb::blocked_range(0, criteria.max_iteration_, + utility::DefaultGrainSizeTBB()), + reducer); auto best_result = std::move(reducer.best_result); utility::LogDebug( "RANSAC exits after {:d} validations. Best inlier ratio {:e}, " @@ -347,19 +376,19 @@ RegistrationResult RegistrationRANSACBasedOnCorrespondence( } RegistrationResult RegistrationRANSACBasedOnFeatureMatching( - const geometry::PointCloud &source, - const geometry::PointCloud &target, - const Feature &source_features, - const Feature &target_features, + const geometry::PointCloud& source, + const geometry::PointCloud& target, + const Feature& source_features, + const Feature& target_features, bool mutual_filter, double max_correspondence_distance, - const TransformationEstimation - &estimation /* = TransformationEstimationPointToPoint(false)*/, + const TransformationEstimation& + estimation /* = TransformationEstimationPointToPoint(false)*/, int ransac_n /* = 3*/, - const std::vector> - &checkers /* = {}*/, - const RANSACConvergenceCriteria - &criteria /* = RANSACConvergenceCriteria()*/) { + const std::vector>& + checkers /* = {}*/, + const RANSACConvergenceCriteria& + criteria /* = RANSACConvergenceCriteria()*/) { if (ransac_n < 3 || max_correspondence_distance <= 0.0) { return RegistrationResult(); } @@ -414,16 +443,14 @@ struct InformationMatrixReducer { } } - void join(InformationMatrixReducer& other) { - GTG += other.GTG; - } + void join(InformationMatrixReducer& other) { GTG += other.GTG; } }; Eigen::Matrix6d GetInformationMatrixFromPointClouds( - const geometry::PointCloud &source, - const geometry::PointCloud &target, + const geometry::PointCloud& source, + const geometry::PointCloud& target, double max_correspondence_distance, - const Eigen::Matrix4d &transformation) { + const Eigen::Matrix4d& transformation) { geometry::PointCloud pcd = source; if (!transformation.isIdentity()) { pcd.Transform(transformation); @@ -435,11 +462,10 @@ Eigen::Matrix6d GetInformationMatrixFromPointClouds( transformation); InformationMatrixReducer reducer(result.correspondence_set_, target); - tbb::parallel_reduce( - tbb::blocked_range(0, - result.correspondence_set_.size(), - utility::DefaultGrainSizeTBB()), - reducer); + tbb::parallel_reduce(tbb::blocked_range( + 0, result.correspondence_set_.size(), + utility::DefaultGrainSizeTBB()), + reducer); return std::move(reducer.GTG); } diff --git a/cpp/open3d/pipelines/registration/Registration.h b/cpp/open3d/pipelines/registration/Registration.h index 6eeabfdf1c7..36a9e0274c0 100644 --- a/cpp/open3d/pipelines/registration/Registration.h +++ b/cpp/open3d/pipelines/registration/Registration.h @@ -109,10 +109,10 @@ class RegistrationResult { inlier_rmse_ < other.inlier_rmse_); } - RegistrationResult(const RegistrationResult&) = default; - RegistrationResult(RegistrationResult&&) = default; - RegistrationResult& operator=(const RegistrationResult&) = default; - RegistrationResult& operator=(RegistrationResult&&) = default; + RegistrationResult(const RegistrationResult &) = default; + RegistrationResult(RegistrationResult &&) = default; + RegistrationResult &operator=(const RegistrationResult &) = default; + RegistrationResult &operator=(RegistrationResult &&) = default; public: /// The estimated transformation matrix. diff --git a/cpp/open3d/t/geometry/kernel/PointCloudCPU.cpp b/cpp/open3d/t/geometry/kernel/PointCloudCPU.cpp index 9fea392807d..c648fc7e496 100644 --- a/cpp/open3d/t/geometry/kernel/PointCloudCPU.cpp +++ b/cpp/open3d/t/geometry/kernel/PointCloudCPU.cpp @@ -5,10 +5,10 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- -#include "open3d/t/geometry/kernel/PointCloudImpl.h" - -#include #include +#include + +#include "open3d/t/geometry/kernel/PointCloudImpl.h" namespace open3d { namespace t { @@ -42,43 +42,49 @@ void ProjectCPU( } tbb::spin_mutex mtx; - core::ParallelFor(core::Device("CPU:0"), points.GetLength(), + core::ParallelFor( + core::Device("CPU:0"), points.GetLength(), [&](int64_t workload_idx) { - float x = points_ptr[3 * workload_idx + 0]; - float y = points_ptr[3 * workload_idx + 1]; - float z = points_ptr[3 * workload_idx + 2]; + float x = points_ptr[3 * workload_idx + 0]; + float y = points_ptr[3 * workload_idx + 1]; + float z = points_ptr[3 * workload_idx + 2]; - // coordinate in camera (in voxel -> in meter) - float xc, yc, zc, u, v; - transform_indexer.RigidTransform(x, y, z, &xc, &yc, &zc); + // coordinate in camera (in voxel -> in meter) + float xc, yc, zc, u, v; + transform_indexer.RigidTransform(x, y, z, &xc, &yc, &zc); - // coordinate in image (in pixel) - transform_indexer.Project(xc, yc, zc, &u, &v); - u = round(u); - v = round(v); - if (!depth_indexer.InBoundary(u, v) || zc <= 0 || zc > depth_max) { - return; - } + // coordinate in image (in pixel) + transform_indexer.Project(xc, yc, zc, &u, &v); + u = round(u); + v = round(v); + if (!depth_indexer.InBoundary(u, v) || zc <= 0 || + zc > depth_max) { + return; + } - float* depth_ptr = depth_indexer.GetDataPtr( - static_cast(u), static_cast(v)); - float d = zc * depth_scale; - { - tbb::spin_mutex::scoped_lock lock(mtx); - if (*depth_ptr == 0 || *depth_ptr >= d) { - *depth_ptr = d; + float* depth_ptr = depth_indexer.GetDataPtr( + static_cast(u), static_cast(v)); + float d = zc * depth_scale; + { + tbb::spin_mutex::scoped_lock lock(mtx); + if (*depth_ptr == 0 || *depth_ptr >= d) { + *depth_ptr = d; - if (has_colors) { - float* color_ptr = color_indexer.GetDataPtr( - static_cast(u), static_cast(v)); + if (has_colors) { + float* color_ptr = color_indexer.GetDataPtr( + static_cast(u), + static_cast(v)); - color_ptr[0] = point_colors_ptr[3 * workload_idx + 0]; - color_ptr[1] = point_colors_ptr[3 * workload_idx + 1]; - color_ptr[2] = point_colors_ptr[3 * workload_idx + 2]; + color_ptr[0] = + point_colors_ptr[3 * workload_idx + 0]; + color_ptr[1] = + point_colors_ptr[3 * workload_idx + 1]; + color_ptr[2] = + point_colors_ptr[3 * workload_idx + 2]; + } + } } - } - } - }); + }); } } // namespace pointcloud diff --git a/cpp/open3d/t/io/sensor/RGBDVideoReader.cpp b/cpp/open3d/t/io/sensor/RGBDVideoReader.cpp index e135caea14d..0c916233938 100644 --- a/cpp/open3d/t/io/sensor/RGBDVideoReader.cpp +++ b/cpp/open3d/t/io/sensor/RGBDVideoReader.cpp @@ -7,10 +7,10 @@ #include "open3d/t/io/sensor/RGBDVideoReader.h" -#include - #include +#include + #include "open3d/io/IJsonConvertibleIO.h" #include "open3d/io/ImageIO.h" #include "open3d/t/io/sensor/realsense/RSBagReader.h" @@ -55,19 +55,21 @@ void RGBDVideoReader::SaveFrames(const std::string &frame_path, open3d::geometry::Image im_color, im_depth; for (auto tim_rgbd = NextFrame(); !IsEOF() && GetTimestamp() < end_time; ++idx, tim_rgbd = NextFrame()) - tbb::parallel_invoke([&](){ - im_color = tim_rgbd.color_.ToLegacy(); - auto color_file = - fmt::format("{0}/color/{1:05d}.jpg", frame_path, idx); - open3d::io::WriteImage(color_file, im_color); - utility::LogDebug("Written color image to {}", color_file); - }, [&](){ - im_depth = tim_rgbd.depth_.ToLegacy(); - auto depth_file = - fmt::format("{0}/depth/{1:05d}.png", frame_path, idx); - open3d::io::WriteImage(depth_file, im_depth); - utility::LogDebug("Written depth image to {}", depth_file); - }); + tbb::parallel_invoke( + [&]() { + im_color = tim_rgbd.color_.ToLegacy(); + auto color_file = fmt::format("{0}/color/{1:05d}.jpg", + frame_path, idx); + open3d::io::WriteImage(color_file, im_color); + utility::LogDebug("Written color image to {}", color_file); + }, + [&]() { + im_depth = tim_rgbd.depth_.ToLegacy(); + auto depth_file = fmt::format("{0}/depth/{1:05d}.png", + frame_path, idx); + open3d::io::WriteImage(depth_file, im_depth); + utility::LogDebug("Written depth image to {}", depth_file); + }); utility::LogInfo("Written {} depth and color images to {}/{{depth,color}}/", idx, frame_path); diff --git a/cpp/open3d/t/pipelines/kernel/FillInLinearSystemImpl.h b/cpp/open3d/t/pipelines/kernel/FillInLinearSystemImpl.h index 23f3fced488..4d572a4fc41 100644 --- a/cpp/open3d/t/pipelines/kernel/FillInLinearSystemImpl.h +++ b/cpp/open3d/t/pipelines/kernel/FillInLinearSystemImpl.h @@ -56,15 +56,15 @@ void FillInRigidAlignmentTermCPU #if !defined(__CUDACC__) tbb::spin_mutex fill_alignment_mutex; tbb::profiling::set_name(fill_alignment_mutex, - "FillInRigidAlignmentTermCPU"); + "FillInRigidAlignmentTermCPU"); #define LOCAL_LAMBDA_CAPTURE =, &fill_alignment_mutex #else #defined LOCAL_LAMBDA_CAPTURE = #endif core::ParallelFor( - AtA.GetDevice(), n, [LOCAL_LAMBDA_CAPTURE] - OPEN3D_DEVICE(int64_t workload_idx) { + AtA.GetDevice(), n, + [LOCAL_LAMBDA_CAPTURE] OPEN3D_DEVICE(int64_t workload_idx) { const float *p_prime = Ti_ps_ptr + 3 * workload_idx; const float *q_prime = Tj_qs_ptr + 3 * workload_idx; const float *normal_p_prime = @@ -100,19 +100,19 @@ void FillInRigidAlignmentTermCPU } atomicAdd(residual_ptr, r * r); #else - { - tbb::spin_mutex::scoped_lock lock(fill_alignment_mutex); - for (int i_local = 0; i_local < 12; ++i_local) { - for (int j_local = 0; j_local < 12; ++j_local) { - AtA_local_ptr[i_local * 12 + j_local] - += J_ij[i_local] * J_ij[j_local]; - } - Atb_local_ptr[i_local] += J_ij[i_local] * r; - } - *residual_ptr += r * r; - } + { + tbb::spin_mutex::scoped_lock lock(fill_alignment_mutex); + for (int i_local = 0; i_local < 12; ++i_local) { + for (int j_local = 0; j_local < 12; ++j_local) { + AtA_local_ptr[i_local * 12 + j_local] += + J_ij[i_local] * J_ij[j_local]; + } + Atb_local_ptr[i_local] += J_ij[i_local] * r; + } + *residual_ptr += r * r; + } #endif - }); + }); #undef LOCAL_LAMBDA_CAPTURE // Then fill-in the large linear system @@ -200,15 +200,15 @@ void FillInSLACAlignmentTermCPU #if !defined(__CUDACC__) tbb::spin_mutex fill_alignment_mutex; tbb::profiling::set_name(fill_alignment_mutex, - "FillInSLACAlignmentTermCPU"); + "FillInSLACAlignmentTermCPU"); #define LOCAL_LAMBDA_CAPTURE =, &fill_alignment_mutex #else #defined LOCAL_LAMBDA_CAPTURE = #endif core::ParallelFor( - AtA.GetDevice(), n, [LOCAL_LAMBDA_CAPTURE] - OPEN3D_DEVICE(int64_t workload_idx) { + AtA.GetDevice(), n, + [LOCAL_LAMBDA_CAPTURE] OPEN3D_DEVICE(int64_t workload_idx) { const float *Ti_Cp = Ti_Cps_ptr + 3 * workload_idx; const float *Tj_Cq = Tj_Cqs_ptr + 3 * workload_idx; const float *Cnormal_p = Cnormal_ps_ptr + 3 * workload_idx; @@ -284,19 +284,19 @@ void FillInSLACAlignmentTermCPU } atomicAdd(residual_ptr, r * r); #else - { - tbb::spin_mutex::scoped_lock lock(fill_alignment_mutex); - for (int ki = 0; ki < 60; ++ki) { - for (int kj = 0; kj < 60; ++kj) { - AtA_ptr[idx[ki] * n_vars + idx[kj]] - += J[ki] * J[kj]; - } - Atb_ptr[idx[ki]] += J[ki] * r; - } - *residual_ptr += r * r; - } + { + tbb::spin_mutex::scoped_lock lock(fill_alignment_mutex); + for (int ki = 0; ki < 60; ++ki) { + for (int kj = 0; kj < 60; ++kj) { + AtA_ptr[idx[ki] * n_vars + idx[kj]] += + J[ki] * J[kj]; + } + Atb_ptr[idx[ki]] += J[ki] * r; + } + *residual_ptr += r * r; + } #endif - }); + }); #undef LOCAL_LAMBDA_CAPTURE } @@ -338,14 +338,15 @@ void FillInSLACRegularizerTermCPU #if !defined(__CUDACC__) tbb::spin_mutex fill_alignment_mutex; tbb::profiling::set_name(fill_alignment_mutex, - "FillInSLACRegularizerTermCPU"); + "FillInSLACRegularizerTermCPU"); #define LOCAL_LAMBDA_CAPTURE =, &fill_alignment_mutex #else #defined LOCAL_LAMBDA_CAPTURE = #endif - core::ParallelFor(AtA.GetDevice(), n, [LOCAL_LAMBDA_CAPTURE] - OPEN3D_DEVICE(int64_t workload_idx) { + core::ParallelFor( + AtA.GetDevice(), n, + [LOCAL_LAMBDA_CAPTURE] OPEN3D_DEVICE(int64_t workload_idx) { // Enumerate 6 neighbors int idx_i = grid_idx_ptr[workload_idx]; @@ -477,34 +478,37 @@ void FillInSLACRegularizerTermCPU -weight * local_r[axis]); } #else - { - tbb::spin_mutex::scoped_lock lock(fill_alignment_mutex); - // Update residual - *residual_ptr += weight * (local_r[0] * local_r[0] + - local_r[1] * local_r[1] + - local_r[2] * local_r[2]); - - for (int axis = 0; axis < 3; ++axis) { - // Update AtA: 2x2 - AtA_ptr[(offset_idx_i + axis) * n_vars + - offset_idx_i + axis] += weight; - AtA_ptr[(offset_idx_k + axis) * n_vars + - offset_idx_k + axis] += weight; - - AtA_ptr[(offset_idx_i + axis) * n_vars + - offset_idx_k + axis] -= weight; - AtA_ptr[(offset_idx_k + axis) * n_vars + - offset_idx_i + axis] -= weight; - - // Update Atb: 2x1 - Atb_ptr[offset_idx_i + axis] += weight * local_r[axis]; - Atb_ptr[offset_idx_k + axis] -= weight * local_r[axis]; - } - } + { + tbb::spin_mutex::scoped_lock lock( + fill_alignment_mutex); + // Update residual + *residual_ptr += weight * (local_r[0] * local_r[0] + + local_r[1] * local_r[1] + + local_r[2] * local_r[2]); + + for (int axis = 0; axis < 3; ++axis) { + // Update AtA: 2x2 + AtA_ptr[(offset_idx_i + axis) * n_vars + + offset_idx_i + axis] += weight; + AtA_ptr[(offset_idx_k + axis) * n_vars + + offset_idx_k + axis] += weight; + + AtA_ptr[(offset_idx_i + axis) * n_vars + + offset_idx_k + axis] -= weight; + AtA_ptr[(offset_idx_k + axis) * n_vars + + offset_idx_i + axis] -= weight; + + // Update Atb: 2x1 + Atb_ptr[offset_idx_i + axis] += + weight * local_r[axis]; + Atb_ptr[offset_idx_k + axis] -= + weight * local_r[axis]; + } + } #endif } } - }); + }); #undef LOCAL_LAMBDA_CAPTURE } } // namespace kernel diff --git a/cpp/open3d/t/pipelines/kernel/RGBDOdometryCPU.cpp b/cpp/open3d/t/pipelines/kernel/RGBDOdometryCPU.cpp index 8ce0634653d..27a098d84dc 100644 --- a/cpp/open3d/t/pipelines/kernel/RGBDOdometryCPU.cpp +++ b/cpp/open3d/t/pipelines/kernel/RGBDOdometryCPU.cpp @@ -49,40 +49,40 @@ void ComputeOdometryInformationMatrixCPU(const core::Tensor& source_vertex_map, A_1x21 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_21, [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - workload_idx++) { - int y = workload_idx / cols; - int x = workload_idx % cols; - - float J_x[6], J_y[6], J_z[6]; - float rx, ry, rz; - - bool valid = GetJacobianPointToPoint( - x, y, square_dist_thr, source_vertex_indexer, - target_vertex_indexer, ti, J_x, J_y, J_z, rx, ry, - rz); - - if (valid) { - for (int i = 0, j = 0; j < 6; j++) { - for (int k = 0; k <= j; k++) { - A_reduction[i] += J_x[j] * J_x[k]; - A_reduction[i] += J_y[j] * J_y[k]; - A_reduction[i] += J_z[j] * J_z[k]; - i++; + for (int workload_idx = r.begin(); workload_idx < r.end(); + workload_idx++) { + int y = workload_idx / cols; + int x = workload_idx % cols; + + float J_x[6], J_y[6], J_z[6]; + float rx, ry, rz; + + bool valid = GetJacobianPointToPoint( + x, y, square_dist_thr, source_vertex_indexer, + target_vertex_indexer, ti, J_x, J_y, J_z, rx, ry, + rz); + + if (valid) { + for (int i = 0, j = 0; j < 6; j++) { + for (int k = 0; k <= j; k++) { + A_reduction[i] += J_x[j] * J_x[k]; + A_reduction[i] += J_y[j] * J_y[k]; + A_reduction[i] += J_z[j] * J_z[k]; + i++; + } + } } } - } - } - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(21); - for (int j = 0; j < 21; j++) { - result[j] = a[j] + b[j]; - } - return result; - }); + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(21); + for (int j = 0; j < 21; j++) { + result[j] = a[j] + b[j]; + } + return result; + }); core::Tensor A_reduction_tensor(A_1x21, {21}, core::Float32, device); float* reduction_ptr = A_reduction_tensor.GetDataPtr(); @@ -141,47 +141,47 @@ void ComputeOdometryResultIntensityCPU( A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - workload_idx++) { - int y = workload_idx / cols; - int x = workload_idx % cols; - - float J_I[6]; - float r_I; - - bool valid = GetJacobianIntensity( - x, y, depth_outlier_trunc, source_depth_indexer, - target_depth_indexer, source_intensity_indexer, - target_intensity_indexer, - target_intensity_dx_indexer, - target_intensity_dy_indexer, source_vertex_indexer, - ti, J_I, r_I); - - if (valid) { - float d_huber = HuberDeriv(r_I, intensity_huber_delta); - float r_huber = HuberLoss(r_I, intensity_huber_delta); - - for (int i = 0, j = 0; j < 6; j++) { - for (int k = 0; k <= j; k++) { - A_reduction[i] += J_I[j] * J_I[k]; - i++; + for (int workload_idx = r.begin(); workload_idx < r.end(); + workload_idx++) { + int y = workload_idx / cols; + int x = workload_idx % cols; + + float J_I[6]; + float r_I; + + bool valid = GetJacobianIntensity( + x, y, depth_outlier_trunc, source_depth_indexer, + target_depth_indexer, source_intensity_indexer, + target_intensity_indexer, + target_intensity_dx_indexer, + target_intensity_dy_indexer, source_vertex_indexer, + ti, J_I, r_I); + + if (valid) { + float d_huber = HuberDeriv(r_I, intensity_huber_delta); + float r_huber = HuberLoss(r_I, intensity_huber_delta); + + for (int i = 0, j = 0; j < 6; j++) { + for (int k = 0; k <= j; k++) { + A_reduction[i] += J_I[j] * J_I[k]; + i++; + } + A_reduction[21 + j] += J_I[j] * d_huber; + } + A_reduction[27] += r_huber; + A_reduction[28] += 1; } - A_reduction[21 + j] += J_I[j] * d_huber; } - A_reduction[27] += r_huber; - A_reduction[28] += 1; - } - } - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(29); - for (int j = 0; j < 29; j++) { - result[j] = a[j] + b[j]; - } - return result; - }); + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(29); + for (int j = 0; j < 29; j++) { + result[j] = a[j] + b[j]; + } + return result; + }); core::Tensor A_reduction_tensor(A_1x29, {29}, core::Float32, device); DecodeAndSolve6x6(A_reduction_tensor, delta, inlier_residual, inlier_count); @@ -234,54 +234,54 @@ void ComputeOdometryResultHybridCPU(const core::Tensor& source_depth, A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - workload_idx++) { - int y = workload_idx / cols; - int x = workload_idx % cols; - - float J_I[6], J_D[6]; - float r_I, r_D; - - bool valid = GetJacobianHybrid( - x, y, depth_outlier_trunc, source_depth_indexer, - target_depth_indexer, source_intensity_indexer, - target_intensity_indexer, target_depth_dx_indexer, - target_depth_dy_indexer, - target_intensity_dx_indexer, - target_intensity_dy_indexer, source_vertex_indexer, - ti, J_I, J_D, r_I, r_D); - - if (valid) { - float d_huber_I = - HuberDeriv(r_I, intensity_huber_delta); - float d_huber_D = HuberDeriv(r_D, depth_huber_delta); - - float r_huber_I = HuberLoss(r_I, intensity_huber_delta); - float r_huber_D = HuberLoss(r_D, depth_huber_delta); - - for (int i = 0, j = 0; j < 6; j++) { - for (int k = 0; k <= j; k++) { - A_reduction[i] += - J_I[j] * J_I[k] + J_D[j] * J_D[k]; - i++; + for (int workload_idx = r.begin(); workload_idx < r.end(); + workload_idx++) { + int y = workload_idx / cols; + int x = workload_idx % cols; + + float J_I[6], J_D[6]; + float r_I, r_D; + + bool valid = GetJacobianHybrid( + x, y, depth_outlier_trunc, source_depth_indexer, + target_depth_indexer, source_intensity_indexer, + target_intensity_indexer, target_depth_dx_indexer, + target_depth_dy_indexer, + target_intensity_dx_indexer, + target_intensity_dy_indexer, source_vertex_indexer, + ti, J_I, J_D, r_I, r_D); + + if (valid) { + float d_huber_I = + HuberDeriv(r_I, intensity_huber_delta); + float d_huber_D = HuberDeriv(r_D, depth_huber_delta); + + float r_huber_I = HuberLoss(r_I, intensity_huber_delta); + float r_huber_D = HuberLoss(r_D, depth_huber_delta); + + for (int i = 0, j = 0; j < 6; j++) { + for (int k = 0; k <= j; k++) { + A_reduction[i] += + J_I[j] * J_I[k] + J_D[j] * J_D[k]; + i++; + } + A_reduction[21 + j] += + J_I[j] * d_huber_I + J_D[j] * d_huber_D; + } + A_reduction[27] += r_huber_I + r_huber_D; + A_reduction[28] += 1; } - A_reduction[21 + j] += - J_I[j] * d_huber_I + J_D[j] * d_huber_D; } - A_reduction[27] += r_huber_I + r_huber_D; - A_reduction[28] += 1; - } - } - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(29); - for (int j = 0; j < 29; j++) { - result[j] = a[j] + b[j]; - } - return result; - }); + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(29); + for (int j = 0; j < 29; j++) { + result[j] = a[j] + b[j]; + } + return result; + }); core::Tensor A_reduction_tensor(A_1x29, {29}, core::Float32, device); DecodeAndSolve6x6(A_reduction_tensor, delta, inlier_residual, inlier_count); } @@ -318,43 +318,43 @@ void ComputeOdometryResultPointToPlaneCPU( A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - workload_idx++) { - int y = workload_idx / cols; - int x = workload_idx % cols; - - float J_ij[6]; - float r; - - bool valid = GetJacobianPointToPlane( - x, y, depth_outlier_trunc, source_vertex_indexer, - target_vertex_indexer, target_normal_indexer, ti, - J_ij, r); - - if (valid) { - float d_huber = HuberDeriv(r, depth_huber_delta); - float r_huber = HuberLoss(r, depth_huber_delta); - for (int i = 0, j = 0; j < 6; j++) { - for (int k = 0; k <= j; k++) { - A_reduction[i] += J_ij[j] * J_ij[k]; - i++; + for (int workload_idx = r.begin(); workload_idx < r.end(); + workload_idx++) { + int y = workload_idx / cols; + int x = workload_idx % cols; + + float J_ij[6]; + float r; + + bool valid = GetJacobianPointToPlane( + x, y, depth_outlier_trunc, source_vertex_indexer, + target_vertex_indexer, target_normal_indexer, ti, + J_ij, r); + + if (valid) { + float d_huber = HuberDeriv(r, depth_huber_delta); + float r_huber = HuberLoss(r, depth_huber_delta); + for (int i = 0, j = 0; j < 6; j++) { + for (int k = 0; k <= j; k++) { + A_reduction[i] += J_ij[j] * J_ij[k]; + i++; + } + A_reduction[21 + j] += J_ij[j] * d_huber; + } + A_reduction[27] += r_huber; + A_reduction[28] += 1; } - A_reduction[21 + j] += J_ij[j] * d_huber; } - A_reduction[27] += r_huber; - A_reduction[28] += 1; - } - } - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(29); - for (int j = 0; j < 29; j++) { - result[j] = a[j] + b[j]; - } - return result; - }); + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(29); + for (int j = 0; j < 29; j++) { + result[j] = a[j] + b[j]; + } + return result; + }); core::Tensor A_reduction_tensor(A_1x29, {29}, core::Float32, device); DecodeAndSolve6x6(A_reduction_tensor, delta, inlier_residual, inlier_count); diff --git a/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp b/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp index ba3b440c932..fe302d9cf35 100644 --- a/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp +++ b/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp @@ -44,44 +44,44 @@ static void ComputePosePointToPlaneKernelCPU( std::vector zeros_29(29, 0.0); A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, - [&](const tbb::blocked_range& range, - std::vector A_reduction) { - for (int workload_idx = range.begin(); workload_idx < range.end(); - ++workload_idx) { - scalar_t J_ij[6]; - scalar_t r = 0; - - bool valid = kernel::GetJacobianPointToPlane( - workload_idx, source_points_ptr, target_points_ptr, - target_normals_ptr, correspondence_indices, J_ij, - r); - - scalar_t w = GetWeightFromRobustKernel(r); - - if (valid) { - // Dump J, r into JtJ and Jtr - int i = 0; - for (int j = 0; j < 6; ++j) { - for (int k = 0; k <= j; ++k) { - A_reduction[i] += J_ij[j] * w * J_ij[k]; - ++i; + [&](const tbb::blocked_range &range, + std::vector A_reduction) { + for (int workload_idx = range.begin(); + workload_idx < range.end(); ++workload_idx) { + scalar_t J_ij[6]; + scalar_t r = 0; + + bool valid = kernel::GetJacobianPointToPlane( + workload_idx, source_points_ptr, target_points_ptr, + target_normals_ptr, correspondence_indices, J_ij, + r); + + scalar_t w = GetWeightFromRobustKernel(r); + + if (valid) { + // Dump J, r into JtJ and Jtr + int i = 0; + for (int j = 0; j < 6; ++j) { + for (int k = 0; k <= j; ++k) { + A_reduction[i] += J_ij[j] * w * J_ij[k]; + ++i; + } + A_reduction[21 + j] += J_ij[j] * w * r; + } + A_reduction[27] += r; + A_reduction[28] += 1; } - A_reduction[21 + j] += J_ij[j] * w * r; } - A_reduction[27] += r; - A_reduction[28] += 1; - } - } - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(29); - for (int j = 0; j < 29; ++j) { - result[j] = a[j] + b[j]; - } - return result; - }); + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(29); + for (int j = 0; j < 29; ++j) { + result[j] = a[j] + b[j]; + } + return result; + }); for (int i = 0; i < 29; ++i) { global_sum[i] = A_1x29[i]; @@ -143,49 +143,49 @@ static void ComputePoseColoredICPKernelCPU( std::vector zeros_29(29, 0.0); A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, - [&](const tbb::blocked_range& r, + [&](const tbb::blocked_range &r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - ++workload_idx) { - scalar_t J_G[6] = {0}, J_I[6] = {0}; - scalar_t r_G = 0, r_I = 0; - - bool valid = GetJacobianColoredICP( - workload_idx, source_points_ptr, source_colors_ptr, - target_points_ptr, target_normals_ptr, - target_colors_ptr, target_color_gradients_ptr, - correspondence_indices, sqrt_lambda_geometric, - sqrt_lambda_photometric, J_G, J_I, r_G, r_I); - - scalar_t w_G = GetWeightFromRobustKernel(r_G); - scalar_t w_I = GetWeightFromRobustKernel(r_I); - - if (valid) { - // Dump J, r into JtJ and Jtr - int i = 0; - for (int j = 0; j < 6; ++j) { - for (int k = 0; k <= j; ++k) { - A_reduction[i] += J_G[j] * w_G * J_G[k] + - J_I[j] * w_I * J_I[k]; - ++i; + for (int workload_idx = r.begin(); workload_idx < r.end(); + ++workload_idx) { + scalar_t J_G[6] = {0}, J_I[6] = {0}; + scalar_t r_G = 0, r_I = 0; + + bool valid = GetJacobianColoredICP( + workload_idx, source_points_ptr, source_colors_ptr, + target_points_ptr, target_normals_ptr, + target_colors_ptr, target_color_gradients_ptr, + correspondence_indices, sqrt_lambda_geometric, + sqrt_lambda_photometric, J_G, J_I, r_G, r_I); + + scalar_t w_G = GetWeightFromRobustKernel(r_G); + scalar_t w_I = GetWeightFromRobustKernel(r_I); + + if (valid) { + // Dump J, r into JtJ and Jtr + int i = 0; + for (int j = 0; j < 6; ++j) { + for (int k = 0; k <= j; ++k) { + A_reduction[i] += J_G[j] * w_G * J_G[k] + + J_I[j] * w_I * J_I[k]; + ++i; + } + A_reduction[21 + j] += + J_G[j] * w_G * r_G + J_I[j] * w_I * r_I; + } + A_reduction[27] += r_G * r_G + r_I * r_I; + A_reduction[28] += 1; } - A_reduction[21 + j] += - J_G[j] * w_G * r_G + J_I[j] * w_I * r_I; } - A_reduction[27] += r_G * r_G + r_I * r_I; - A_reduction[28] += 1; - } - } - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(29); - for (int j = 0; j < 29; ++j) { - result[j] = a[j] + b[j]; - } - return result; - }); + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(29); + for (int j = 0; j < 29; ++j) { + result[j] = a[j] + b[j]; + } + return result; + }); for (int i = 0; i < 29; ++i) { global_sum[i] = A_1x29[i]; @@ -575,36 +575,36 @@ void ComputeInformationMatrixKernelCPU(const scalar_t *target_points_ptr, AtA = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_21, [&](tbb::blocked_range r, std::vector A_reduction) { - for (int workload_idx = r.begin(); workload_idx < r.end(); - ++workload_idx) { - scalar_t J_x[6] = {0}, J_y[6] = {0}, J_z[6] = {0}; - - bool valid = GetInformationJacobians( - workload_idx, target_points_ptr, - correspondence_indices, J_x, J_y, J_z); - - if (valid) { - int i = 0; - for (int j = 0; j < 6; ++j) { - for (int k = 0; k <= j; ++k) { - A_reduction[i] += J_x[j] * J_x[k] + - J_y[j] * J_y[k] + - J_z[j] * J_z[k]; - ++i; + for (int workload_idx = r.begin(); workload_idx < r.end(); + ++workload_idx) { + scalar_t J_x[6] = {0}, J_y[6] = {0}, J_z[6] = {0}; + + bool valid = GetInformationJacobians( + workload_idx, target_points_ptr, + correspondence_indices, J_x, J_y, J_z); + + if (valid) { + int i = 0; + for (int j = 0; j < 6; ++j) { + for (int k = 0; k <= j; ++k) { + A_reduction[i] += J_x[j] * J_x[k] + + J_y[j] * J_y[k] + + J_z[j] * J_z[k]; + ++i; + } + } } } - } - } - return A_reduction; - }, - // TBB: Defining reduction operation. - [&](std::vector a, std::vector b) { - std::vector result(21); - for (int j = 0; j < 21; ++j) { - result[j] = a[j] + b[j]; - } - return result; - }); + return A_reduction; + }, + // TBB: Defining reduction operation. + [&](std::vector a, std::vector b) { + std::vector result(21); + for (int j = 0; j < 21; ++j) { + result[j] = a[j] + b[j]; + } + return result; + }); for (int i = 0; i < 21; ++i) { global_sum[i] = AtA[i]; diff --git a/cpp/open3d/t/pipelines/registration/Feature.cpp b/cpp/open3d/t/pipelines/registration/Feature.cpp index 56cd9a76e81..b2217dee0c9 100644 --- a/cpp/open3d/t/pipelines/registration/Feature.cpp +++ b/cpp/open3d/t/pipelines/registration/Feature.cpp @@ -104,16 +104,16 @@ core::Tensor CorrespondencesFromFeatures(const core::Tensor &source_features, // corres[0]: corres_ij, corres[1]: corres_ji tbb::parallel_for(tbb::blocked_range(0, num_searches, 1), - [&](const tbb::blocked_range& range){ - for (int i = range.begin(); i < range.end(); ++i) { - core::nns::NearestNeighborSearch nns( - features[1 - i], core::Dtype::Int64); - nns.KnnIndex(); - auto result = nns.KnnSearch(features[i], 1); - - corres[i] = result.first.View({-1}); - } - }); + [&](const tbb::blocked_range &range) { + for (int i = range.begin(); i < range.end(); ++i) { + core::nns::NearestNeighborSearch nns( + features[1 - i], core::Dtype::Int64); + nns.KnnIndex(); + auto result = nns.KnnSearch(features[i], 1); + + corres[i] = result.first.View({-1}); + } + }); auto corres_ij = corres[0]; core::Tensor arange_source = diff --git a/cpp/open3d/utility/Eigen.cpp b/cpp/open3d/utility/Eigen.cpp index 292834e0050..db10e65c45c 100644 --- a/cpp/open3d/utility/Eigen.cpp +++ b/cpp/open3d/utility/Eigen.cpp @@ -157,7 +157,7 @@ struct JTJandJTrReduceBodyHelper { VecType JTr = VecType::Zero(); double r2_sum = 0.0; - void join(const JTJandJTrReduceBodyHelper& rhs) { + void join(const JTJandJTrReduceBodyHelper &rhs) { JTJ += rhs.JTJ; JTr += rhs.JTr; r2_sum += rhs.r2_sum; @@ -173,24 +173,24 @@ struct JTJandJTrReduceBody; // Code specific to the single valued reduction template -struct JTJandJTrReduceBody: public - JTJandJTrReduceBodyHelper { - using FuncType = std::function; +struct JTJandJTrReduceBody + : public JTJandJTrReduceBodyHelper { + using FuncType = std::function; // Global data - FuncType& f; + FuncType &f; // Local data VecType J_r = VecType::Zero(); double r = 0.0; double w = 0.0; - JTJandJTrReduceBody(FuncType& f_) + JTJandJTrReduceBody(FuncType &f_) : JTJandJTrReduceBodyHelper(), f(f_) {} - JTJandJTrReduceBody(JTJandJTrReduceBody& other, tbb::split sp) + JTJandJTrReduceBody(JTJandJTrReduceBody &other, tbb::split sp) : JTJandJTrReduceBodyHelper(), f(other.f) {} - void operator()(const tbb::blocked_range& range) { + void operator()(const tbb::blocked_range &range) { for (int i = range.begin(); i < range.end(); ++i) { f(i, J_r, r, w); this->JTJ.noalias() += J_r * w * J_r.transpose(); @@ -198,7 +198,6 @@ struct JTJandJTrReduceBody: public this->r2_sum += r * r; } } - }; template @@ -207,8 +206,10 @@ std::tuple ComputeJTJandJTr( int iteration_num, bool verbose /*=true*/) { JTJandJTrReduceBody reducer(f); - tbb::parallel_reduce(tbb::blocked_range(0, iteration_num, - utility::DefaultGrainSizeTBB()), reducer); + tbb::parallel_reduce( + tbb::blocked_range(0, iteration_num, + utility::DefaultGrainSizeTBB()), + reducer); std::tuple result = std::move(reducer).as_tuple(); if (verbose) { LogDebug("Residual : {:.2e} (# of elements : {:d})", @@ -219,28 +220,36 @@ std::tuple ComputeJTJandJTr( // Code specific to the vector valued reduction template -struct JTJandJTrReduceBody: public - JTJandJTrReduceBodyHelper { - using FuncType = std::function>&, - std::vector&, std::vector&)>; +struct JTJandJTrReduceBody + : public JTJandJTrReduceBodyHelper { + using FuncType = std::function> &, + std::vector &, + std::vector &)>; // Global data - FuncType& f; + FuncType &f; // Local data std::vector> J_r; std::vector r; std::vector w; - JTJandJTrReduceBody(FuncType& f_) + JTJandJTrReduceBody(FuncType &f_) : JTJandJTrReduceBodyHelper(), - f(f_), J_r(), r(), w() {} + f(f_), + J_r(), + r(), + w() {} - JTJandJTrReduceBody(JTJandJTrReduceBody& other, tbb::split sp) + JTJandJTrReduceBody(JTJandJTrReduceBody &other, tbb::split sp) : JTJandJTrReduceBodyHelper(), - f(other.f), J_r(), r(), w() {} + f(other.f), + J_r(), + r(), + w() {} - void operator()(const tbb::blocked_range& range) { + void operator()(const tbb::blocked_range &range) { for (int i = range.begin(); i < range.end(); ++i) { f(i, J_r, r, w); for (int j = 0; j < (int)r.size(); j++) { @@ -250,7 +259,6 @@ struct JTJandJTrReduceBody: public } } } - }; template @@ -263,8 +271,10 @@ std::tuple ComputeJTJandJTr( int iteration_num, bool verbose /*=true*/) { JTJandJTrReduceBody reducer(f); - tbb::parallel_reduce(tbb::blocked_range(0, iteration_num, - utility::DefaultGrainSizeTBB()), reducer); + tbb::parallel_reduce( + tbb::blocked_range(0, iteration_num, + utility::DefaultGrainSizeTBB()), + reducer); std::tuple result = std::move(reducer).as_tuple(); if (verbose) { LogDebug("Residual : {:.2e} (# of elements : {:d})", diff --git a/cpp/open3d/utility/Logging.h b/cpp/open3d/utility/Logging.h index fdb132f9b06..5dd4d830024 100644 --- a/cpp/open3d/utility/Logging.h +++ b/cpp/open3d/utility/Logging.h @@ -143,7 +143,7 @@ class Logger { int line, const char *function, const char *format, - Args &&... args) { + Args &&...args) { if (sizeof...(Args) > 0) { Logger::GetInstance().VError( file, line, function, @@ -158,7 +158,7 @@ class Logger { int line, const char *function, const char *format, - Args &&... args) { + Args &&...args) { if (Logger::GetInstance().GetVerbosityLevel() >= VerbosityLevel::Warning) { if (sizeof...(Args) > 0) { @@ -176,7 +176,7 @@ class Logger { int line, const char *function, const char *format, - Args &&... args) { + Args &&...args) { if (Logger::GetInstance().GetVerbosityLevel() >= VerbosityLevel::Info) { if (sizeof...(Args) > 0) { Logger::GetInstance().VInfo( @@ -193,7 +193,7 @@ class Logger { int line, const char *function, const char *format, - Args &&... args) { + Args &&...args) { if (Logger::GetInstance().GetVerbosityLevel() >= VerbosityLevel::Debug) { if (sizeof...(Args) > 0) { diff --git a/cpp/open3d/utility/Parallel.cpp b/cpp/open3d/utility/Parallel.cpp index 313f80a1756..d4ca3d0eed6 100644 --- a/cpp/open3d/utility/Parallel.cpp +++ b/cpp/open3d/utility/Parallel.cpp @@ -9,13 +9,10 @@ #include - namespace open3d { namespace utility { -int EstimateMaxThreads() { - return tbb::this_task_arena::max_concurrency(); -} +int EstimateMaxThreads() { return tbb::this_task_arena::max_concurrency(); } std::size_t& DefaultGrainSizeTBB() noexcept { static std::size_t GrainSize = 256; diff --git a/cpp/open3d/utility/ProgressBar.cpp b/cpp/open3d/utility/ProgressBar.cpp index 7a1efae3a9e..2dea083b385 100644 --- a/cpp/open3d/utility/ProgressBar.cpp +++ b/cpp/open3d/utility/ProgressBar.cpp @@ -5,11 +5,12 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- -#include "open3d/utility/Logging.h" #include "open3d/utility/ProgressBar.h" #include +#include "open3d/utility/Logging.h" + #ifdef _OPENMP #include #endif @@ -100,8 +101,8 @@ TBBProgressBar::TBBProgressBar(std::size_t expected_count, } void TBBProgressBar::Reset(std::size_t expected_count, - std::string progress_info, - bool active) noexcept(false) { + std::string progress_info, + bool active) noexcept(false) { if (expected_count & flag_bit_mask) { utility::LogError("Expected count out of range [0, 2^31)"); } @@ -138,13 +139,13 @@ void TBBProgressBar::UpdateBar() { // If so set the flag bit and print 100% // tmp is created so that compare_exchange doesn't modify expected_count if (std::size_t tmp = expected_count_; - current_count_.compare_exchange_strong( - tmp, expected_count_ | flag_bit_mask)) { + current_count_.compare_exchange_strong( + tmp, expected_count_ | flag_bit_mask)) { fmt::print("{}[{}] 100%\n", progress_info_, std::string(resolution_, '=')); } else if (tbb::this_task_arena::current_thread_index() != 0) { - std::size_t new_progress_pixel = current_count_ * - resolution_ / expected_count_; + std::size_t new_progress_pixel = + current_count_ * resolution_ / expected_count_; if (new_progress_pixel > progress_pixel_) { progress_pixel_ = new_progress_pixel; int percent = int(current_count_ * 100 / expected_count_); diff --git a/cpp/open3d/utility/ProgressBar.h b/cpp/open3d/utility/ProgressBar.h index cc68a4f2c6b..9aef731eef3 100644 --- a/cpp/open3d/utility/ProgressBar.h +++ b/cpp/open3d/utility/ProgressBar.h @@ -20,9 +20,7 @@ class ProgressBar { ProgressBar(size_t expected_count, std::string progress_info, bool active = false); - void Reset(size_t expected_count, - std::string progress_info, - bool active); + void Reset(size_t expected_count, std::string progress_info, bool active); inline ProgressBar& operator++() { return *this += 1; }; virtual ProgressBar& operator+=(std::size_t n); void SetCurrentCount(size_t n); @@ -63,7 +61,6 @@ class TBBProgressBar { return current_count_ & ~flag_bit_mask; } - protected: static constexpr std::size_t flag_bit_mask = ~(~std::size_t{} >> 1); static constexpr std::size_t resolution_ = 40; diff --git a/cpp/open3d/utility/Random.h b/cpp/open3d/utility/Random.h index 6b38a2fd02d..c7a5393d7e5 100644 --- a/cpp/open3d/utility/Random.h +++ b/cpp/open3d/utility/Random.h @@ -8,6 +8,7 @@ #pragma once #include + #include #include "open3d/utility/Logging.h" diff --git a/cpp/tests/utility/ProgressBar.cpp b/cpp/tests/utility/ProgressBar.cpp index da08565c1b9..b8e175e433c 100644 --- a/cpp/tests/utility/ProgressBar.cpp +++ b/cpp/tests/utility/ProgressBar.cpp @@ -7,11 +7,11 @@ #include "open3d/utility/ProgressBar.h" +#include + #include #include -#include - #include "open3d/utility/Parallel.h" #include "tests/Tests.h" @@ -34,9 +34,8 @@ TEST(ProgressBar, OMPProgressBar) { utility::OMPProgressBar progress_bar(iterations, "OMPProgressBar test: ", true); -#pragma omp parallel for schedule(static) default(none) \ - shared(iterations, progress_bar) \ - num_threads(utility::EstimateMaxThreads()) +#pragma omp parallel for schedule(static) default(none) shared( \ + iterations, progress_bar) num_threads(utility::EstimateMaxThreads()) for (int i = 0; i < iterations; ++i) { std::this_thread::sleep_for(std::chrono::milliseconds(10)); ++progress_bar; @@ -46,29 +45,31 @@ TEST(ProgressBar, OMPProgressBar) { TEST(ProgressBar, TBBProgressBar) { int iterations = 1000; - utility::TBBProgressBar progress_bar( - iterations, "TBBProgressBar test: ", true); - tbb::parallel_for(tbb::blocked_range(0, iterations, 10), + utility::TBBProgressBar progress_bar(iterations, + "TBBProgressBar test: ", true); + tbb::parallel_for( + tbb::blocked_range(0, iterations, 10), [&](const tbb::blocked_range& range) { - for (int i = range.begin(); i < range.end(); ++i) { - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - ++progress_bar; - } - }); + for (int i = range.begin(); i < range.end(); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + ++progress_bar; + } + }); EXPECT_EQ(static_cast(progress_bar.GetCurrentCount()), iterations); } TEST(ProgressBar, TBBProgressBarBatched) { int iterations = 1000; - utility::TBBProgressBar progress_bar( - iterations, "Batched TBBProgressBar test: ", true); - tbb::parallel_for(tbb::blocked_range(0, iterations, 10), + utility::TBBProgressBar progress_bar(iterations, + "Batched TBBProgressBar test: ", true); + tbb::parallel_for( + tbb::blocked_range(0, iterations, 10), [&](const tbb::blocked_range& range) { - for (int i = range.begin(); i < range.end(); ++i) { - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - } - progress_bar += (range.end() - range.begin()); - }); + for (int i = range.begin(); i < range.end(); ++i) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + progress_bar += (range.end() - range.begin()); + }); EXPECT_EQ(static_cast(progress_bar.GetCurrentCount()), iterations); } diff --git a/examples/cpp/RealSenseBagReader.cpp b/examples/cpp/RealSenseBagReader.cpp index 8c58801b0da..7a7a58d8ba8 100644 --- a/examples/cpp/RealSenseBagReader.cpp +++ b/examples/cpp/RealSenseBagReader.cpp @@ -6,6 +6,7 @@ // ---------------------------------------------------------------------------- #include +#include #include #include @@ -14,8 +15,6 @@ #include #include -#include - #include "open3d/Open3D.h" using namespace open3d; @@ -206,17 +205,19 @@ int main(int argc, char *argv[]) { ++idx; if (write_image) { - tbb::parallel_invoke([&](){ - auto color_file = fmt::format("{0}/color/{1:05d}.jpg", - output_path, idx); - utility::LogInfo("Writing to {}", color_file); - io::WriteImage(color_file, im_rgbd.color_); - }, [&](){ - auto depth_file = fmt::format("{0}/depth/{1:05d}.png", - output_path, idx); - utility::LogInfo("Writing to {}", depth_file); - io::WriteImage(depth_file, im_rgbd.depth_); - }); + tbb::parallel_invoke( + [&]() { + auto color_file = fmt::format( + "{0}/color/{1:05d}.jpg", output_path, idx); + utility::LogInfo("Writing to {}", color_file); + io::WriteImage(color_file, im_rgbd.color_); + }, + [&]() { + auto depth_file = fmt::format( + "{0}/depth/{1:05d}.png", output_path, idx); + utility::LogInfo("Writing to {}", depth_file); + io::WriteImage(depth_file, im_rgbd.depth_); + }); } vis.UpdateGeometry(); vis.UpdateRender(); diff --git a/examples/cpp/TrimMeshBasedOnPointCloud.cpp b/examples/cpp/TrimMeshBasedOnPointCloud.cpp index ef45c51b801..311e31fb3dd 100644 --- a/examples/cpp/TrimMeshBasedOnPointCloud.cpp +++ b/examples/cpp/TrimMeshBasedOnPointCloud.cpp @@ -5,10 +5,10 @@ // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- -#include "open3d/Open3D.h" - #include +#include "open3d/Open3D.h" + void PrintHelp() { using namespace open3d; From 6399001b9a95cd68413253d11cb3059d8a4dab60 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Mon, 12 Feb 2024 12:51:00 -0800 Subject: [PATCH 10/22] Marked single argument constructors for reductions as explicit to please codacy --- cpp/open3d/utility/Eigen.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/open3d/utility/Eigen.cpp b/cpp/open3d/utility/Eigen.cpp index db10e65c45c..6ffefddcd74 100644 --- a/cpp/open3d/utility/Eigen.cpp +++ b/cpp/open3d/utility/Eigen.cpp @@ -184,7 +184,7 @@ struct JTJandJTrReduceBody double r = 0.0; double w = 0.0; - JTJandJTrReduceBody(FuncType &f_) + explicit JTJandJTrReduceBody(FuncType &f_) : JTJandJTrReduceBodyHelper(), f(f_) {} JTJandJTrReduceBody(JTJandJTrReduceBody &other, tbb::split sp) @@ -235,7 +235,7 @@ struct JTJandJTrReduceBody std::vector r; std::vector w; - JTJandJTrReduceBody(FuncType &f_) + explicit JTJandJTrReduceBody(FuncType &f_) : JTJandJTrReduceBodyHelper(), f(f_), J_r(), From 11c546d4f3d9f9445e4a9cee8a693b2ce3e78cc6 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Mon, 12 Feb 2024 12:52:12 -0800 Subject: [PATCH 11/22] Explicitly load atomics in calls to utility::Log*(...) because newer versions of format wont automatically convert it to its underlying type. --- cpp/open3d/geometry/PointCloudSegmentation.cpp | 2 +- cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp | 4 ++-- cpp/open3d/pipelines/color_map/RigidOptimizer.cpp | 4 ++-- cpp/open3d/pipelines/registration/Registration.cpp | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/open3d/geometry/PointCloudSegmentation.cpp b/cpp/open3d/geometry/PointCloudSegmentation.cpp index 74c63236dd5..c36cc4dafb4 100644 --- a/cpp/open3d/geometry/PointCloudSegmentation.cpp +++ b/cpp/open3d/geometry/PointCloudSegmentation.cpp @@ -268,7 +268,7 @@ std::tuple> PointCloud::SegmentPlane( "RANSAC | Inliers: {:d}, Fitness: {:e}, RMSE: {:e}, Iteration: " "{:d}", final_inliers.size(), result.fitness_, result.inlier_rmse_, - iteration_count); + iteration_count.load()); return std::make_tuple(best_plane_model, final_inliers); } diff --git a/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp b/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp index 7a0861bbce7..96f35fe9061 100644 --- a/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp +++ b/cpp/open3d/pipelines/color_map/NonRigidOptimizer.cpp @@ -400,8 +400,8 @@ RunNonRigidOptimizer(const geometry::TriangleMesh& mesh, atomic_sum(residual_reg, rr_reg); } }); - utility::LogDebug("Residual error : {:.6f}, reg : {:.6f}", residual, - residual_reg); + utility::LogDebug("Residual error : {:.6f}, reg : {:.6f}", + residual.load(), residual_reg.load()); SetProxyIntensityForVertex(opt_mesh, images_gray, warping_fields, opt_camera_trajectory, visibility_vertex_to_image, proxy_intensity, diff --git a/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp b/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp index 91636d39e50..fffe0ea42f6 100644 --- a/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp +++ b/cpp/open3d/pipelines/color_map/RigidOptimizer.cpp @@ -204,9 +204,9 @@ RunRigidOptimizer(const geometry::TriangleMesh& mesh, }); if (total_num_ > 0) { utility::LogDebug("Residual error : {:.6f} (avg : {:.6f})", - residual, residual / total_num_); + residual.load(), residual / total_num_); } else { - utility::LogDebug("Residual error : {:.6f}", residual); + utility::LogDebug("Residual error : {:.6f}", residual.load()); } SetProxyIntensityForVertex(opt_mesh, images_gray, utility::nullopt, opt_camera_trajectory, diff --git a/cpp/open3d/pipelines/registration/Registration.cpp b/cpp/open3d/pipelines/registration/Registration.cpp index a8e0a9b6858..d9ec1533f42 100644 --- a/cpp/open3d/pipelines/registration/Registration.cpp +++ b/cpp/open3d/pipelines/registration/Registration.cpp @@ -370,8 +370,8 @@ RegistrationResult RegistrationRANSACBasedOnCorrespondence( auto best_result = std::move(reducer.best_result); utility::LogDebug( "RANSAC exits after {:d} validations. Best inlier ratio {:e}, " - "RMSE {:e}", - total_validation, best_result.fitness_, best_result.inlier_rmse_); + "RMSE {:e}", total_validation.load(), best_result.fitness_, + best_result.inlier_rmse_); return best_result; } From f48abcca147a656db164bdbf6a3578804a781577 Mon Sep 17 00:00:00 2001 From: Sameer Sheorey Date: Wed, 17 Apr 2024 13:00:29 -0700 Subject: [PATCH 12/22] style fix --- cpp/open3d/pipelines/registration/Registration.cpp | 3 ++- cpp/open3d/utility/Logging.h | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/open3d/pipelines/registration/Registration.cpp b/cpp/open3d/pipelines/registration/Registration.cpp index d9ec1533f42..ed8fa6f3317 100644 --- a/cpp/open3d/pipelines/registration/Registration.cpp +++ b/cpp/open3d/pipelines/registration/Registration.cpp @@ -370,7 +370,8 @@ RegistrationResult RegistrationRANSACBasedOnCorrespondence( auto best_result = std::move(reducer.best_result); utility::LogDebug( "RANSAC exits after {:d} validations. Best inlier ratio {:e}, " - "RMSE {:e}", total_validation.load(), best_result.fitness_, + "RMSE {:e}", + total_validation.load(), best_result.fitness_, best_result.inlier_rmse_); return best_result; } diff --git a/cpp/open3d/utility/Logging.h b/cpp/open3d/utility/Logging.h index 5dd4d830024..fdb132f9b06 100644 --- a/cpp/open3d/utility/Logging.h +++ b/cpp/open3d/utility/Logging.h @@ -143,7 +143,7 @@ class Logger { int line, const char *function, const char *format, - Args &&...args) { + Args &&... args) { if (sizeof...(Args) > 0) { Logger::GetInstance().VError( file, line, function, @@ -158,7 +158,7 @@ class Logger { int line, const char *function, const char *format, - Args &&...args) { + Args &&... args) { if (Logger::GetInstance().GetVerbosityLevel() >= VerbosityLevel::Warning) { if (sizeof...(Args) > 0) { @@ -176,7 +176,7 @@ class Logger { int line, const char *function, const char *format, - Args &&...args) { + Args &&... args) { if (Logger::GetInstance().GetVerbosityLevel() >= VerbosityLevel::Info) { if (sizeof...(Args) > 0) { Logger::GetInstance().VInfo( @@ -193,7 +193,7 @@ class Logger { int line, const char *function, const char *format, - Args &&...args) { + Args &&... args) { if (Logger::GetInstance().GetVerbosityLevel() >= VerbosityLevel::Debug) { if (sizeof...(Args) > 0) { From c04077f9fff6387bedd8b744c21f69ac1d84181c Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Tue, 10 Sep 2024 18:48:17 -0700 Subject: [PATCH 13/22] Fixed incorrect include in Parallel.h --- cpp/open3d/utility/Parallel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/open3d/utility/Parallel.h b/cpp/open3d/utility/Parallel.h index 61452ac85c3..33b59fc3cd0 100644 --- a/cpp/open3d/utility/Parallel.h +++ b/cpp/open3d/utility/Parallel.h @@ -7,7 +7,7 @@ #pragma once -#include +#include namespace open3d { namespace utility { From 7d6c7073fc41cb7b4b9da133a6c830e7647142f7 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Tue, 10 Sep 2024 18:48:58 -0700 Subject: [PATCH 14/22] Updated ProgressBar.h to use size_t in std namespace for consistency --- cpp/open3d/utility/ProgressBar.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/open3d/utility/ProgressBar.h b/cpp/open3d/utility/ProgressBar.h index 9aef731eef3..7d3999e0706 100644 --- a/cpp/open3d/utility/ProgressBar.h +++ b/cpp/open3d/utility/ProgressBar.h @@ -17,10 +17,10 @@ namespace utility { class ProgressBar { public: - ProgressBar(size_t expected_count, + ProgressBar(std::size_t expected_count, std::string progress_info, bool active = false); - void Reset(size_t expected_count, std::string progress_info, bool active); + void Reset(std::size_t expected_count, std::string progress_info, bool active); inline ProgressBar& operator++() { return *this += 1; }; virtual ProgressBar& operator+=(std::size_t n); void SetCurrentCount(size_t n); From bba01efd45a165c04dc487e6c6fc83c680ae2931 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Tue, 10 Sep 2024 18:49:33 -0700 Subject: [PATCH 15/22] Switched std::lock_guard for DiscreteGenerator to use tbb::spin_mutex::scoped_lock. --- cpp/open3d/utility/Random.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/open3d/utility/Random.h b/cpp/open3d/utility/Random.h index c7a5393d7e5..9445c03666b 100644 --- a/cpp/open3d/utility/Random.h +++ b/cpp/open3d/utility/Random.h @@ -210,8 +210,8 @@ class DiscreteGenerator { /// Call this to generate a discretely distributed integer value. T operator()() { - std::lock_guard lock(*GetMutex()); - return distribution_(*GetEngine()); + tbb::spin_mutex::scoped_lock lock(GetMutex()); + return distribution_(GetEngine()); } protected: From ec98caebf7598011b51b012ff8bc8f94d9c16869 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Tue, 10 Sep 2024 18:49:57 -0700 Subject: [PATCH 16/22] Switch to using tbb::parallel_reduce on all platforms. --- cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp b/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp index fe302d9cf35..7fee90c998f 100644 --- a/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp +++ b/cpp/open3d/t/pipelines/kernel/RegistrationCPU.cpp @@ -262,18 +262,12 @@ static void ComputePoseDopplerICPKernelCPU( // and 28th as inlier_count. std::vector A_1x29(29, 0.0); -#ifdef _WIN32 std::vector zeros_29(29, 0.0); A_1x29 = tbb::parallel_reduce( tbb::blocked_range(0, n), zeros_29, [&](tbb::blocked_range r, std::vector A_reduction) { for (int workload_idx = r.begin(); workload_idx < r.end(); ++workload_idx) { -#else - scalar_t *A_reduction = A_1x29.data(); -#pragma omp parallel for reduction(+ : A_reduction[:29]) schedule(static) num_threads(utility::EstimateMaxThreads()) - for (int workload_idx = 0; workload_idx < n; ++workload_idx) { -#endif scalar_t J_G[6] = {0}, J_D[6] = {0}; scalar_t r_G = 0, r_D = 0; @@ -306,7 +300,6 @@ static void ComputePoseDopplerICPKernelCPU( A_reduction[28] += 1; } } -#ifdef _WIN32 return A_reduction; }, // TBB: Defining reduction operation. @@ -317,7 +310,6 @@ static void ComputePoseDopplerICPKernelCPU( } return result; }); -#endif for (int i = 0; i < 29; ++i) { global_sum[i] = A_1x29[i]; From dbe4133f34b1d3eba90bf20150bbc202824608c8 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Thu, 19 Sep 2024 19:39:21 -0700 Subject: [PATCH 17/22] Fixed progress bar bug that allow for multiple threads writing to the terminal. --- cpp/open3d/utility/ProgressBar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/open3d/utility/ProgressBar.cpp b/cpp/open3d/utility/ProgressBar.cpp index 2dea083b385..45c022fb841 100644 --- a/cpp/open3d/utility/ProgressBar.cpp +++ b/cpp/open3d/utility/ProgressBar.cpp @@ -143,7 +143,7 @@ void TBBProgressBar::UpdateBar() { tmp, expected_count_ | flag_bit_mask)) { fmt::print("{}[{}] 100%\n", progress_info_, std::string(resolution_, '=')); - } else if (tbb::this_task_arena::current_thread_index() != 0) { + } else if (tbb::this_task_arena::current_thread_index() == 0) { std::size_t new_progress_pixel = current_count_ * resolution_ / expected_count_; if (new_progress_pixel > progress_pixel_) { From 25d8e7feed33616a72efa6b169025321c635b227 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Thu, 19 Sep 2024 19:40:23 -0700 Subject: [PATCH 18/22] Fixed issue with CPU reduction. Need to exit early if no work is to be done to prevent assignment to the output pointer. --- cpp/open3d/core/kernel/ReductionCPU.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/open3d/core/kernel/ReductionCPU.cpp b/cpp/open3d/core/kernel/ReductionCPU.cpp index bdde3acfec6..7eb256cd739 100644 --- a/cpp/open3d/core/kernel/ReductionCPU.cpp +++ b/cpp/open3d/core/kernel/ReductionCPU.cpp @@ -112,9 +112,13 @@ class CPUReductionEngine { "Internal error: two-pass reduction only works for " "single-output reduction ops."); } + const auto num_workloads = indexer.NumWorkloads(); + if (num_workloads == 0) { + return; + } scalar_t& dst = *reinterpret_cast(indexer.GetOutputPtr(0)); dst = tbb::parallel_reduce( - tbb::blocked_range(0, indexer.NumWorkloads(), + tbb::blocked_range(0, num_workloads, utility::DefaultGrainSizeTBB()), identity, [&](const tbb::blocked_range& range, scalar_t so_far) { From 929f0104229682637d970ceab20f24e08c53db87 Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Thu, 19 Sep 2024 20:08:32 -0700 Subject: [PATCH 19/22] Updated MemoryManagerStatistic to use a tbb::spin_mutex instead of a std::mutex --- cpp/open3d/core/MemoryManagerStatistic.cpp | 6 +++--- cpp/open3d/core/MemoryManagerStatistic.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/open3d/core/MemoryManagerStatistic.cpp b/cpp/open3d/core/MemoryManagerStatistic.cpp index 82c15e2edf9..978172a7e0a 100644 --- a/cpp/open3d/core/MemoryManagerStatistic.cpp +++ b/cpp/open3d/core/MemoryManagerStatistic.cpp @@ -118,7 +118,7 @@ bool MemoryManagerStatistic::HasLeaks() const { void MemoryManagerStatistic::CountMalloc(void* ptr, size_t byte_size, const Device& device) { - std::lock_guard lock(statistics_mutex_); + tbb::spin_mutex::scoped_lock lock(statistics_mutex_); // Filter nullptr. Empty allocations are not tracked. if (ptr == nullptr && byte_size == 0) { @@ -141,7 +141,7 @@ void MemoryManagerStatistic::CountMalloc(void* ptr, } void MemoryManagerStatistic::CountFree(void* ptr, const Device& device) { - std::lock_guard lock(statistics_mutex_); + tbb::spin_mutex::scoped_lock lock(statistics_mutex_); // Filter nullptr. Empty allocations are not tracked. if (ptr == nullptr) { @@ -170,7 +170,7 @@ void MemoryManagerStatistic::CountFree(void* ptr, const Device& device) { } void MemoryManagerStatistic::Reset() { - std::lock_guard lock(statistics_mutex_); + tbb::spin_mutex::scoped_lock lock(statistics_mutex_); statistics_.clear(); } diff --git a/cpp/open3d/core/MemoryManagerStatistic.h b/cpp/open3d/core/MemoryManagerStatistic.h index 60de026ec5c..d27049e6e2c 100644 --- a/cpp/open3d/core/MemoryManagerStatistic.h +++ b/cpp/open3d/core/MemoryManagerStatistic.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "open3d/core/Device.h" @@ -86,7 +87,7 @@ class MemoryManagerStatistic { /// Print at each malloc and free, disabled by default. bool print_at_malloc_free_ = false; - std::mutex statistics_mutex_; + tbb::spin_mutex statistics_mutex_; std::map statistics_; }; From 31484e5088832f8fc396cc1d11ee9ab3d995b56f Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Thu, 19 Sep 2024 20:17:43 -0700 Subject: [PATCH 20/22] Updated point cloud segmentation to generate random samples using the global mutex from utilities::random. --- .../geometry/PointCloudSegmentation.cpp | 33 +++++++++---------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/cpp/open3d/geometry/PointCloudSegmentation.cpp b/cpp/open3d/geometry/PointCloudSegmentation.cpp index c36cc4dafb4..b079dd57323 100644 --- a/cpp/open3d/geometry/PointCloudSegmentation.cpp +++ b/cpp/open3d/geometry/PointCloudSegmentation.cpp @@ -6,6 +6,7 @@ // ---------------------------------------------------------------------------- #include +#include #include #include @@ -29,30 +30,28 @@ namespace geometry { template class RandomSampler { public: - explicit RandomSampler(const size_t total_size) : total_size_(total_size) {} + explicit RandomSampler(const std::size_t total_size) + : dist(0, total_size - 1) {} - std::vector operator()(size_t sample_size) { - std::lock_guard lock(mutex_); - std::vector samples; - samples.reserve(sample_size); + std::vector operator()(std::size_t sample_size) { + std::vector indices; + indices.reserve(sample_size); - size_t valid_sample = 0; - while (valid_sample < sample_size) { - const size_t idx = utility::random::RandUint32() % total_size_; + tbb::spin_mutex::scoped_lock lock(utility::random::GetMutex()); + while (indices.size() < sample_size) { + const T idx = dist(utility::random::GetEngine()); // Well, this is slow. But typically the sample_size is small. - if (std::find(samples.begin(), samples.end(), idx) == - samples.end()) { - samples.push_back(idx); - valid_sample++; + if (std::find(indices.begin(), indices.end(), idx) == + indices.end()) { + indices.emplace_back(idx); } } - return samples; + return indices; } private: - size_t total_size_; - std::mutex mutex_; + std::uniform_int_distribution dist; }; /// \class RANSACResult @@ -198,9 +197,7 @@ std::tuple> PointCloud::SegmentPlane( continue; } - const std::vector sampled_indices = - sampler(ransac_n); - std::vector inliers = sampled_indices; + std::vector inliers = sampler(ransac_n); // Fit model to num_model_parameters randomly selected // points among the inliers. From 81b9bb3d9178d56edfaefd8d7dd59b6e45a82bfa Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Mon, 23 Sep 2024 11:36:09 -0700 Subject: [PATCH 21/22] Updated example usage of global mutex and engine access to reflect the TBB types. --- cpp/open3d/utility/Random.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/open3d/utility/Random.h b/cpp/open3d/utility/Random.h index 9445c03666b..1ea135a7e50 100644 --- a/cpp/open3d/utility/Random.h +++ b/cpp/open3d/utility/Random.h @@ -29,8 +29,8 @@ void Seed(const int seed); /// /// { /// // Put the lock and the call to the engine in the same scope. -/// std::lock_guard lock(*utility::random::GetMutex()); -/// std::shuffle(vals.begin(), vals.end(), *utility::random::GetEngine()); +/// tbb::spin_mutex::scoped_lock lock(utility::random::GetMutex()); +/// std::shuffle(vals.begin(), vals.end(), utility::random::GetEngine()); /// } /// ``` std::mt19937& GetEngine(); From 86a474d5c7c327c604e0b705979fa61545aa392c Mon Sep 17 00:00:00 2001 From: Daniel Simon Date: Wed, 2 Oct 2024 13:01:47 -0700 Subject: [PATCH 22/22] Turn TBB into publicly linked library --- 3rdparty/find_dependencies.cmake | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/3rdparty/find_dependencies.cmake b/3rdparty/find_dependencies.cmake index 77b7085df69..25bef81996e 100644 --- a/3rdparty/find_dependencies.cmake +++ b/3rdparty/find_dependencies.cmake @@ -1568,7 +1568,7 @@ if(OPEN3D_USE_ONEAPI_PACKAGES) PACKAGE TBB TARGETS TBB::tbb ) - list(APPEND Open3D_3RDPARTY_PRIVATE_TARGETS_FROM_SYSTEM Open3D::3rdparty_tbb) + list(APPEND Open3D_3RDPARTY_PUBLIC_TARGETS_FROM_SYSTEM Open3D::3rdparty_tbb) else(OPEN3D_USE_ONEAPI_PACKAGES) # MKL/BLAS @@ -1687,7 +1687,7 @@ else(OPEN3D_USE_ONEAPI_PACKAGES) INCLUDE_DIRS ${STATIC_MKL_INCLUDE_DIR} LIB_DIR ${STATIC_MKL_LIB_DIR} LIBRARIES ${STATIC_MKL_LIBRARIES} - DEPENDS ext_tbb ext_mkl_include ext_mkl + DEPENDS Open3D::3rdparty_tbb ext_mkl_include ext_mkl ) if(UNIX) target_compile_options(3rdparty_blas INTERFACE "$<$:-m64>") @@ -1709,9 +1709,13 @@ else(OPEN3D_USE_ONEAPI_PACKAGES) endif() if(NOT USE_SYSTEM_TBB) include(${Open3D_3RDPARTY_DIR}/mkl/tbb.cmake) - list(APPEND Open3D_3RDPARTY_PRIVATE_TARGETS_FROM_CUSTOM TBB::tbb) + add_library(3rdparty_tbb INTERFACE) + add_library(Open3D::3rdparty_tbb ALIAS 3rdparty_tbb) + target_link_libraries(3rdparty_tbb INTERFACE TBB::tbb) + install(TARGETS 3rdparty_tbb EXPORT ${PROJECT_NAME}Targets) + list(APPEND Open3D_3RDPARTY_PUBLIC_TARGETS_FROM_CUSTOM Open3D::3rdparty_tbb) else() - list(APPEND Open3D_3RDPARTY_PRIVATE_TARGETS_FROM_SYSTEM Open3D::3rdparty_tbb) + list(APPEND Open3D_3RDPARTY_PUBLIC_TARGETS_FROM_SYSTEM Open3D::3rdparty_tbb) endif() endif(OPEN3D_USE_ONEAPI_PACKAGES)