diff --git a/include/LightGBM/cuda/cuda_tree.hpp b/include/LightGBM/cuda/cuda_tree.hpp
index 7ab06190481b..f3b92f52b385 100644
--- a/include/LightGBM/cuda/cuda_tree.hpp
+++ b/include/LightGBM/cuda/cuda_tree.hpp
@@ -97,7 +97,15 @@ class CUDATree : public Tree {
 
   const double* cuda_leaf_value() const { return cuda_leaf_value_; }
 
-  double* cuda_leaf_value_ref() { return cuda_leaf_value_; }
+  double* cuda_leaf_value_ref() const { return cuda_leaf_value_; }
+
+  int host_leaf_depth(int leaf_index) { 
+    if (leaf_index >= 0 && leaf_index < num_leaves_) {
+      return host_leaf_depth_[leaf_index];
+    } else {
+      return -1;
+    }
+  }
 
   inline void Shrinkage(double rate) override;
 
@@ -161,6 +169,8 @@ class CUDATree : public Tree {
   CUDAVector<int> cuda_cat_boundaries_;
   CUDAVector<int> cuda_cat_boundaries_inner_;
 
+  std::vector<int> host_leaf_depth_;
+
   cudaStream_t cuda_stream_;
 
   const int num_threads_per_block_add_prediction_to_score_;
diff --git a/src/io/cuda/cuda_tree.cpp b/src/io/cuda/cuda_tree.cpp
index c5dee89ca3af..571bc828f434 100644
--- a/src/io/cuda/cuda_tree.cpp
+++ b/src/io/cuda/cuda_tree.cpp
@@ -24,6 +24,8 @@ num_threads_per_block_add_prediction_to_score_(1024) {
     cuda_cat_boundaries_inner_.Resize(max_leaves);
   }
   InitCUDAMemory();
+  host_leaf_depth_.resize(max_leaves_, -1);
+  host_leaf_depth_[0] = 1;
 }
 
 CUDATree::CUDATree(const Tree* host_tree):
@@ -217,6 +219,10 @@ int CUDATree::Split(const int leaf_index,
            const CUDASplitInfo* cuda_split_info) {
   LaunchSplitKernel(leaf_index, real_feature_index, real_threshold, missing_type, cuda_split_info);
   RecordBranchFeatures(leaf_index, num_leaves_, real_feature_index);
+
+  ++host_leaf_depth_[leaf_index];
+  host_leaf_depth_[num_leaves_] = host_leaf_depth_[leaf_index];
+
   ++num_leaves_;
   return num_leaves_ - 1;
 }
diff --git a/src/treelearner/cuda/cuda_best_split_finder.cpp b/src/treelearner/cuda/cuda_best_split_finder.cpp
index e272ce744b1a..52ad9d8dca69 100644
--- a/src/treelearner/cuda/cuda_best_split_finder.cpp
+++ b/src/treelearner/cuda/cuda_best_split_finder.cpp
@@ -27,6 +27,7 @@ CUDABestSplitFinder::CUDABestSplitFinder(
   min_data_in_leaf_(config->min_data_in_leaf),
   min_sum_hessian_in_leaf_(config->min_sum_hessian_in_leaf),
   min_gain_to_split_(config->min_gain_to_split),
+  max_depth_(config->max_depth),
   cat_smooth_(config->cat_smooth),
   cat_l2_(config->cat_l2),
   max_cat_threshold_(config->max_cat_threshold),
@@ -330,14 +331,18 @@ void CUDABestSplitFinder::FindBestSplitsForLeaf(
   const data_size_t num_data_in_larger_leaf,
   const double sum_hessians_in_smaller_leaf,
   const double sum_hessians_in_larger_leaf,
+  const int small_leaf_depth,
+  const int larger_leaf_depth,
   const score_t* grad_scale,
   const score_t* hess_scale,
   const uint8_t smaller_num_bits_in_histogram_bins,
   const uint8_t larger_num_bits_in_histogram_bins) {
   const bool is_smaller_leaf_valid = (num_data_in_smaller_leaf > min_data_in_leaf_ &&
-    sum_hessians_in_smaller_leaf > min_sum_hessian_in_leaf_);
+    sum_hessians_in_smaller_leaf > min_sum_hessian_in_leaf_ &&
+    (max_depth > 0 && smaller_leaf_depth > 0 && smaller_leaf_depth < max_depth));
   const bool is_larger_leaf_valid = (num_data_in_larger_leaf > min_data_in_leaf_ &&
-    sum_hessians_in_larger_leaf > min_sum_hessian_in_leaf_ && larger_leaf_index >= 0);
+    sum_hessians_in_larger_leaf > min_sum_hessian_in_leaf_ && larger_leaf_index >= 0 &&
+    (max_depth > 0 && larger_leaf_depth > 0 && larger_leaf_depth < max_depth));
   if (grad_scale != nullptr && hess_scale != nullptr) {
     LaunchFindBestSplitsDiscretizedForLeafKernel(smaller_leaf_splits, larger_leaf_splits,
       smaller_leaf_index, larger_leaf_index, is_smaller_leaf_valid, is_larger_leaf_valid,
diff --git a/src/treelearner/cuda/cuda_best_split_finder.hpp b/src/treelearner/cuda/cuda_best_split_finder.hpp
index 2d9940312533..09a70ddff05d 100644
--- a/src/treelearner/cuda/cuda_best_split_finder.hpp
+++ b/src/treelearner/cuda/cuda_best_split_finder.hpp
@@ -182,6 +182,7 @@ class CUDABestSplitFinder {
   data_size_t min_data_in_leaf_;
   double min_sum_hessian_in_leaf_;
   double min_gain_to_split_;
+  int max_depth_;
   double cat_smooth_;
   double cat_l2_;
   int max_cat_threshold_;
diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
index 952ef52f8023..0f7d7910e2fe 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
@@ -227,6 +227,8 @@ Tree* CUDASingleGPUTreeLearner::Train(const score_t* gradients,
         smaller_leaf_index_, larger_leaf_index_,
         num_data_in_smaller_leaf, num_data_in_larger_leaf,
         sum_hessians_in_smaller_leaf, sum_hessians_in_larger_leaf,
+        tree->host_leaf_depth(smaller_leaf_index_),
+        tree->host_leaf_depth(larger_leaf_index_),
         cuda_gradient_discretizer_->grad_scale_ptr(),
         cuda_gradient_discretizer_->hess_scale_ptr(),
         smaller_leaf_num_bits_bin,
@@ -238,6 +240,8 @@ Tree* CUDASingleGPUTreeLearner::Train(const score_t* gradients,
         smaller_leaf_index_, larger_leaf_index_,
         num_data_in_smaller_leaf, num_data_in_larger_leaf,
         sum_hessians_in_smaller_leaf, sum_hessians_in_larger_leaf,
+        tree->host_leaf_depth(smaller_leaf_index_),
+        tree->host_leaf_depth(larger_leaf_index_),
         nullptr, nullptr, 0, 0);
     }