diff --git a/include/LightGBM/cuda/cuda_tree.hpp b/include/LightGBM/cuda/cuda_tree.hpp index 7ab06190481b..f3b92f52b385 100644 --- a/include/LightGBM/cuda/cuda_tree.hpp +++ b/include/LightGBM/cuda/cuda_tree.hpp @@ -97,7 +97,15 @@ class CUDATree : public Tree { const double* cuda_leaf_value() const { return cuda_leaf_value_; } - double* cuda_leaf_value_ref() { return cuda_leaf_value_; } + double* cuda_leaf_value_ref() const { return cuda_leaf_value_; } + + int host_leaf_depth(int leaf_index) { + if (leaf_index >= 0 && leaf_index < num_leaves_) { + return host_leaf_depth_[leaf_index]; + } else { + return -1; + } + } inline void Shrinkage(double rate) override; @@ -161,6 +169,8 @@ class CUDATree : public Tree { CUDAVector cuda_cat_boundaries_; CUDAVector cuda_cat_boundaries_inner_; + std::vector host_leaf_depth_; + cudaStream_t cuda_stream_; const int num_threads_per_block_add_prediction_to_score_; diff --git a/src/io/cuda/cuda_tree.cpp b/src/io/cuda/cuda_tree.cpp index c5dee89ca3af..571bc828f434 100644 --- a/src/io/cuda/cuda_tree.cpp +++ b/src/io/cuda/cuda_tree.cpp @@ -24,6 +24,8 @@ num_threads_per_block_add_prediction_to_score_(1024) { cuda_cat_boundaries_inner_.Resize(max_leaves); } InitCUDAMemory(); + host_leaf_depth_.resize(max_leaves_, -1); + host_leaf_depth_[0] = 1; } CUDATree::CUDATree(const Tree* host_tree): @@ -217,6 +219,10 @@ int CUDATree::Split(const int leaf_index, const CUDASplitInfo* cuda_split_info) { LaunchSplitKernel(leaf_index, real_feature_index, real_threshold, missing_type, cuda_split_info); RecordBranchFeatures(leaf_index, num_leaves_, real_feature_index); + + ++host_leaf_depth_[leaf_index]; + host_leaf_depth_[num_leaves_] = host_leaf_depth_[leaf_index]; + ++num_leaves_; return num_leaves_ - 1; } diff --git a/src/treelearner/cuda/cuda_best_split_finder.cpp b/src/treelearner/cuda/cuda_best_split_finder.cpp index e272ce744b1a..52ad9d8dca69 100644 --- a/src/treelearner/cuda/cuda_best_split_finder.cpp +++ b/src/treelearner/cuda/cuda_best_split_finder.cpp @@ -27,6 +27,7 @@ CUDABestSplitFinder::CUDABestSplitFinder( min_data_in_leaf_(config->min_data_in_leaf), min_sum_hessian_in_leaf_(config->min_sum_hessian_in_leaf), min_gain_to_split_(config->min_gain_to_split), + max_depth_(config->max_depth), cat_smooth_(config->cat_smooth), cat_l2_(config->cat_l2), max_cat_threshold_(config->max_cat_threshold), @@ -330,14 +331,18 @@ void CUDABestSplitFinder::FindBestSplitsForLeaf( const data_size_t num_data_in_larger_leaf, const double sum_hessians_in_smaller_leaf, const double sum_hessians_in_larger_leaf, + const int small_leaf_depth, + const int larger_leaf_depth, const score_t* grad_scale, const score_t* hess_scale, const uint8_t smaller_num_bits_in_histogram_bins, const uint8_t larger_num_bits_in_histogram_bins) { const bool is_smaller_leaf_valid = (num_data_in_smaller_leaf > min_data_in_leaf_ && - sum_hessians_in_smaller_leaf > min_sum_hessian_in_leaf_); + sum_hessians_in_smaller_leaf > min_sum_hessian_in_leaf_ && + (max_depth > 0 && smaller_leaf_depth > 0 && smaller_leaf_depth < max_depth)); const bool is_larger_leaf_valid = (num_data_in_larger_leaf > min_data_in_leaf_ && - sum_hessians_in_larger_leaf > min_sum_hessian_in_leaf_ && larger_leaf_index >= 0); + sum_hessians_in_larger_leaf > min_sum_hessian_in_leaf_ && larger_leaf_index >= 0 && + (max_depth > 0 && larger_leaf_depth > 0 && larger_leaf_depth < max_depth)); if (grad_scale != nullptr && hess_scale != nullptr) { LaunchFindBestSplitsDiscretizedForLeafKernel(smaller_leaf_splits, larger_leaf_splits, smaller_leaf_index, larger_leaf_index, is_smaller_leaf_valid, is_larger_leaf_valid, diff --git a/src/treelearner/cuda/cuda_best_split_finder.hpp b/src/treelearner/cuda/cuda_best_split_finder.hpp index 2d9940312533..09a70ddff05d 100644 --- a/src/treelearner/cuda/cuda_best_split_finder.hpp +++ b/src/treelearner/cuda/cuda_best_split_finder.hpp @@ -182,6 +182,7 @@ class CUDABestSplitFinder { data_size_t min_data_in_leaf_; double min_sum_hessian_in_leaf_; double min_gain_to_split_; + int max_depth_; double cat_smooth_; double cat_l2_; int max_cat_threshold_; diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp index 952ef52f8023..0f7d7910e2fe 100644 --- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp +++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp @@ -227,6 +227,8 @@ Tree* CUDASingleGPUTreeLearner::Train(const score_t* gradients, smaller_leaf_index_, larger_leaf_index_, num_data_in_smaller_leaf, num_data_in_larger_leaf, sum_hessians_in_smaller_leaf, sum_hessians_in_larger_leaf, + tree->host_leaf_depth(smaller_leaf_index_), + tree->host_leaf_depth(larger_leaf_index_), cuda_gradient_discretizer_->grad_scale_ptr(), cuda_gradient_discretizer_->hess_scale_ptr(), smaller_leaf_num_bits_bin, @@ -238,6 +240,8 @@ Tree* CUDASingleGPUTreeLearner::Train(const score_t* gradients, smaller_leaf_index_, larger_leaf_index_, num_data_in_smaller_leaf, num_data_in_larger_leaf, sum_hessians_in_smaller_leaf, sum_hessians_in_larger_leaf, + tree->host_leaf_depth(smaller_leaf_index_), + tree->host_leaf_depth(larger_leaf_index_), nullptr, nullptr, 0, 0); }