From f8f46f3c8d1806c22c8ff083792b9e30b21b3fed Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 20 Aug 2025 21:15:27 -0500
Subject: [PATCH 1/4] [ci] [c++] use 'pre-commit' to run 'cpplint', upgrade to
 'cpplint' 2.0.2

---
 .ci/{lint-cpp.sh => check-omp-pragmas.sh}     |  7 -----
 .ci/test.sh                                   |  3 --
 .pre-commit-config.yaml                       | 16 +++++++++++
 include/LightGBM/dataset.h                    | 16 ++++++++---
 include/LightGBM/utils/array_args.h           | 28 +++++++++++++++----
 include/LightGBM/utils/common.h               | 19 ++++++++++---
 include/LightGBM/utils/openmp_wrapper.h       |  8 ++++--
 include/LightGBM/utils/text_reader.h          |  8 ++++--
 src/application/application.cpp               |  8 ++++--
 src/boosting/gbdt.cpp                         | 16 ++++++++---
 src/boosting/goss.hpp                         |  4 ++-
 src/boosting/rf.hpp                           |  4 ++-
 src/c_api.cpp                                 | 28 ++++++++++++++-----
 src/io/bin.cpp                                |  4 ++-
 src/io/dataset_loader.cpp                     | 20 +++++++++----
 src/io/metadata.cpp                           | 28 ++++++++++++++-----
 src/metric/dcg_calculator.cpp                 | 16 ++++++++---
 src/metric/map_metric.hpp                     |  4 ++-
 src/network/linker_topo.cpp                   |  4 ++-
 src/objective/rank_objective.hpp              | 12 ++++++--
 src/treelearner/col_sampler.hpp               |  4 ++-
 .../data_parallel_tree_learner.cpp            |  4 ++-
 .../feature_parallel_tree_learner.cpp         |  4 ++-
 src/treelearner/gpu_tree_learner.cpp          | 10 ++++---
 src/treelearner/serial_tree_learner.cpp       |  8 ++++--
 .../voting_parallel_tree_learner.cpp          | 16 ++++++++---
 26 files changed, 221 insertions(+), 78 deletions(-)
 rename .ci/{lint-cpp.sh => check-omp-pragmas.sh} (82%)

diff --git a/.ci/lint-cpp.sh b/.ci/check-omp-pragmas.sh
similarity index 82%
rename from .ci/lint-cpp.sh
rename to .ci/check-omp-pragmas.sh
index be9c71857e28..42c2e52e0450 100755
--- a/.ci/lint-cpp.sh
+++ b/.ci/check-omp-pragmas.sh
@@ -2,13 +2,6 @@
 
 set -e -E -u -o pipefail
 
-echo "running cpplint"
-cpplint \
-    --filter=-build/c++11,-build/include_subdir,-build/header_guard,-whitespace/line_length \
-    --recursive ./src ./include ./R-package ./swig ./tests \
-|| exit 1
-echo "done running cpplint"
-
 echo "checking that all OpenMP pragmas specify num_threads()"
 get_omp_pragmas_without_num_threads() {
     grep \
diff --git a/.ci/test.sh b/.ci/test.sh
index d3cbe36e9908..b02de747ebdd 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -105,7 +105,6 @@ if [[ $TASK == "lint" ]]; then
     conda create -q -y -n "${CONDA_ENV}" \
         "${CONDA_PYTHON_REQUIREMENT}" \
         'biome>=1.9.3' \
-        'cpplint>=1.6.0' \
         'matplotlib-base>=3.9.1' \
         'mypy>=1.11.1' \
         'pre-commit>=3.8.0' \
@@ -118,8 +117,6 @@ if [[ $TASK == "lint" ]]; then
     bash ./.ci/run-pre-commit-mypy.sh || exit 1
     echo "Linting R code"
     Rscript ./.ci/lint-r-code.R "${BUILD_DIRECTORY}" || exit 1
-    echo "Linting C++ code"
-    bash ./.ci/lint-cpp.sh || exit 1
     echo "Linting JavaScript code"
     bash ./.ci/lint-js.sh || exit 1
     exit 0
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 567aa92ecc72..4ec0009476f3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,6 +22,22 @@ repos:
     hooks:
       - id: cmakelint
         args: ["--linelength=120", "--filter=-convention/filename,-package/stdargs,-readability/wonkycase"]
+  - repo: https://github.com/cpplint/cpplint
+    rev: '2.0.2'
+    hooks:
+      - id: cpplint
+        args:
+          - --recursive
+          - --filter=-build/c++11,-build/include_subdir,-build/include_what_you_use,-build/header_guard,-whitespace/indent_namespace,-whitespace/line_length
+  - repo: local
+    hooks:
+      - id: check-omp-pragmas
+        name: check-omp-pragmas
+        entry: sh
+        args:
+          - .ci/check-omp-pragmas.sh
+        language: system
+        pass_filenames: false
   - repo: https://github.com/adrienverge/yamllint
     rev: v1.37.1
     hooks:
diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index c2a4b62296f2..b6108c0bbdea 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -554,9 +554,13 @@ class Dataset {
   }
 
   inline void FinishOneRow(int tid, data_size_t row_idx, const std::vector<bool>& is_feature_added) {
-    if (is_finish_load_) { return; }
+    if (is_finish_load_) {
+      return;
+    }
     for (auto fidx : feature_need_push_zeros_) {
-      if (is_feature_added[fidx]) { continue; }
+      if (is_feature_added[fidx]) {
+        continue;
+      }
       const int group = feature2group_[fidx];
       const int sub_feature = feature2subfeature_[fidx];
       feature_groups_[group]->PushData(tid, sub_feature, row_idx, 0.0f);
@@ -587,10 +591,14 @@ class Dataset {
   }
 
   inline void PushOneRow(int tid, data_size_t row_idx, const std::vector<std::pair<int, double>>& feature_values) {
-    if (is_finish_load_) { return; }
+    if (is_finish_load_) {
+      return;
+    }
     std::vector<bool> is_feature_added(num_features_, false);
     for (auto& inner_data : feature_values) {
-      if (inner_data.first >= num_total_features_) { continue; }
+      if (inner_data.first >= num_total_features_) {
+        continue;
+      }
       int feature_idx = used_feature_map_[inner_data.first];
       if (feature_idx >= 0) {
         is_feature_added[feature_idx] = true;
diff --git a/include/LightGBM/utils/array_args.h b/include/LightGBM/utils/array_args.h
index 3c590bad4030..a21ddab8f59d 100644
--- a/include/LightGBM/utils/array_args.h
+++ b/include/LightGBM/utils/array_args.h
@@ -112,17 +112,33 @@ class ArrayArgs {
     VAL_T v = ref[end - 1];
     for (;;) {
       while (ref[++i] > v) {}
-      while (v > ref[--j]) { if (j == start) { break; } }
-      if (i >= j) { break; }
+      while (v > ref[--j]) {
+        if (j == start) {
+          break;
+        }
+      }
+      if (i >= j) {
+        break;
+      }
       std::swap(ref[i], ref[j]);
-      if (ref[i] == v) { p++; std::swap(ref[p], ref[i]); }
-      if (v == ref[j]) { q--; std::swap(ref[j], ref[q]); }
+      if (ref[i] == v) {
+        p++;
+        std::swap(ref[p], ref[i]);
+      }
+      if (v == ref[j]) {
+        q--;
+        std::swap(ref[j], ref[q]);
+      }
     }
     std::swap(ref[i], ref[end - 1]);
     j = i - 1;
     i = i + 1;
-    for (int k = start; k <= p; k++, j--) { std::swap(ref[k], ref[j]); }
-    for (int k = end - 2; k >= q; k--, i++) { std::swap(ref[i], ref[k]); }
+    for (int k = start; k <= p; k++, j--) {
+      std::swap(ref[k], ref[j]);
+    }
+    for (int k = end - 2; k >= q; k--, i++) {
+      std::swap(ref[i], ref[k]);
+    }
     *l = j;
     *r = i;
   }
diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h
index 67bc07b0ecd5..90610ac98168 100644
--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -315,9 +315,18 @@ inline static const char* Atof(const char* p, double* out) {
       }
       if (expon > 308) expon = 308;
       // Calculate scaling factor.
-      while (expon >= 50) { scale *= 1E50; expon -= 50; }
-      while (expon >= 8) { scale *= 1E8;  expon -= 8; }
-      while (expon > 0) { scale *= 10.0; expon -= 1; }
+      while (expon >= 50) {
+        scale *= 1E50;
+        expon -= 50;
+      }
+      while (expon >= 8) {
+        scale *= 1E8;
+        expon -= 8;
+      }
+      while (expon > 0) {
+        scale *= 10.0;
+        expon -= 1;
+      }
     }
     // Return signed and scaled floating point result.
     *out = sign * (frac ? (value / scale) : (value * scale));
@@ -713,7 +722,9 @@ static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) {
       size_t mid = left + s;
       size_t right = mid + s;
       right = std::min(len, right);
-      if (mid >= right) { continue; }
+      if (mid >= right) {
+        continue;
+      }
       std::copy(_First + left, _First + mid, buf + left);
       std::merge(buf + left, buf + mid, _First + mid, _First + right, _First + left, _Pred);
     }
diff --git a/include/LightGBM/utils/openmp_wrapper.h b/include/LightGBM/utils/openmp_wrapper.h
index b9a8ea2982fc..689f27c9f4a5 100644
--- a/include/LightGBM/utils/openmp_wrapper.h
+++ b/include/LightGBM/utils/openmp_wrapper.h
@@ -62,9 +62,13 @@ class ThreadExceptionHelper {
   }
   void CaptureException() {
     // only catch first exception.
-    if (ex_ptr_ != nullptr) { return; }
+    if (ex_ptr_ != nullptr) {
+      return;
+    }
     std::unique_lock<std::mutex> guard(lock_);
-    if (ex_ptr_ != nullptr) { return; }
+    if (ex_ptr_ != nullptr) {
+      return;
+    }
     ex_ptr_ = std::current_exception();
   }
 
diff --git a/include/LightGBM/utils/text_reader.h b/include/LightGBM/utils/text_reader.h
index ccb25f960d05..b48174542e58 100644
--- a/include/LightGBM/utils/text_reader.h
+++ b/include/LightGBM/utils/text_reader.h
@@ -124,7 +124,9 @@ class TextReader {
           ++i;
           ++total_cnt;
           // skip end of line
-          while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) { ++i; }
+          while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) {
+            ++i;
+          }
           last_i = i;
         } else {
           ++i;
@@ -284,7 +286,9 @@ class TextReader {
           ++i;
           ++total_cnt;
           // skip end of line
-          while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) { ++i; }
+          while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) {
+            ++i;
+          }
           last_i = i;
         } else {
           ++i;
diff --git a/src/application/application.cpp b/src/application/application.cpp
index 42f707f0c801..7dcb182fb981 100644
--- a/src/application/application.cpp
+++ b/src/application/application.cpp
@@ -121,7 +121,9 @@ void Application::LoadData() {
   if (config_.is_provide_training_metric) {
     for (auto metric_type : config_.metric) {
       auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
-      if (metric == nullptr) { continue; }
+      if (metric == nullptr) {
+        continue;
+      }
       metric->Init(train_data_->metadata(), train_data_->num_data());
       train_metric_.push_back(std::move(metric));
     }
@@ -149,7 +151,9 @@ void Application::LoadData() {
       valid_metrics_.emplace_back();
       for (auto metric_type : config_.metric) {
         auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
-        if (metric == nullptr) { continue; }
+        if (metric == nullptr) {
+          continue;
+        }
         metric->Init(valid_datas_.back()->metadata(),
                      valid_datas_.back()->num_data());
         valid_metrics_.back().push_back(std::move(metric));
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 737c69072b64..5bfa8d6605d4 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -210,7 +210,9 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
 
   if (early_stopping_round_ > 0) {
     auto num_metrics = valid_metrics.size();
-    if (es_first_metric_only_) { num_metrics = 1; }
+    if (es_first_metric_only_) {
+      num_metrics = 1;
+    }
     best_iter_.emplace_back(num_metrics, 0);
     best_score_.emplace_back(num_metrics, kMinScore);
     best_msg_.emplace_back(num_metrics);
@@ -452,7 +454,9 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
 }
 
 void GBDT::RollbackOneIter() {
-  if (iter_ <= 0) { return; }
+  if (iter_ <= 0) {
+    return;
+  }
   // reset score
   for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
     auto curr_tree = models_.size() - num_tree_per_iteration_ + cur_tree_id;
@@ -588,7 +592,9 @@ std::string GBDT::OutputMetric(int iter) {
             msg_buf << tmp_buf.str() << '\n';
           }
         }
-        if (es_first_metric_only_ && j > 0) { continue; }
+        if (es_first_metric_only_ && j > 0) {
+          continue;
+        }
         if (ret.empty() && early_stopping_round_ > 0) {
           auto cur_score = valid_metrics_[i][j]->factor_to_bigger_better() * test_scores.back();
           if (cur_score - best_score_[i][j] > early_stopping_min_delta_) {
@@ -596,7 +602,9 @@ std::string GBDT::OutputMetric(int iter) {
             best_iter_[i][j] = iter;
             meet_early_stopping_pairs.emplace_back(i, j);
           } else {
-            if (iter - best_iter_[i][j] >= early_stopping_round_) { ret = best_msg_[i][j]; }
+            if (iter - best_iter_[i][j] >= early_stopping_round_) {
+              ret = best_msg_[i][j];
+            }
           }
         }
       }
diff --git a/src/boosting/goss.hpp b/src/boosting/goss.hpp
index 9d55d89ee097..a853cc3c35c6 100644
--- a/src/boosting/goss.hpp
+++ b/src/boosting/goss.hpp
@@ -30,7 +30,9 @@ class GOSSStrategy : public SampleStrategy {
   void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
     bag_data_cnt_ = num_data_;
     // not subsample for first iterations
-    if (iter < static_cast<int>(1.0f / config_->learning_rate)) { return; }
+    if (iter < static_cast<int>(1.0f / config_->learning_rate)) {
+      return;
+    }
     auto left_cnt = bagging_runner_.Run<true>(
         num_data_,
         [=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
diff --git a/src/boosting/rf.hpp b/src/boosting/rf.hpp
index 3eb065b30f2d..8b20b3a75310 100644
--- a/src/boosting/rf.hpp
+++ b/src/boosting/rf.hpp
@@ -182,7 +182,9 @@ class RF : public GBDT {
   }
 
   void RollbackOneIter() override {
-    if (iter_ <= 0) { return; }
+    if (iter_ <= 0) {
+      return;
+    }
     int cur_iter = iter_ + num_init_iteration_ - 1;
     // reset score
     for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
diff --git a/src/c_api.cpp b/src/c_api.cpp
index e5372646664c..882494f35dc0 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -218,7 +218,9 @@ class Booster {
     for (auto metric_type : config_.metric) {
       auto metric = std::unique_ptr<Metric>(
         Metric::CreateMetric(metric_type, config_));
-      if (metric == nullptr) { continue; }
+      if (metric == nullptr) {
+        continue;
+      }
       metric->Init(train_data_->metadata(), train_data_->num_data());
       train_metric_.push_back(std::move(metric));
     }
@@ -394,7 +396,9 @@ class Booster {
     valid_metrics_.emplace_back();
     for (auto metric_type : config_.metric) {
       auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
-      if (metric == nullptr) { continue; }
+      if (metric == nullptr) {
+        continue;
+      }
       metric->Init(valid_data->metadata(), valid_data->num_data());
       valid_metrics_.back().push_back(std::move(metric));
     }
@@ -1603,7 +1607,9 @@ int LGBM_DatasetCreateFromCSC(const void* col_ptr,
     OMP_LOOP_EX_BEGIN();
     const int tid = omp_get_thread_num();
     int feature_idx = ret->InnerFeatureIndex(i);
-    if (feature_idx < 0) { continue; }
+    if (feature_idx < 0) {
+      continue;
+    }
     int group = ret->Feature2Group(feature_idx);
     int sub_feature = ret->Feture2SubFeature(feature_idx);
     CSC_RowIterator col_it(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem, i);
@@ -1614,7 +1620,9 @@ int LGBM_DatasetCreateFromCSC(const void* col_ptr,
         auto pair = col_it.NextNonZero();
         row_idx = pair.first;
         // no more data
-        if (row_idx < 0) { break; }
+        if (row_idx < 0) {
+          break;
+        }
         ret->PushOneData(tid, row_idx, group, feature_idx, sub_feature, pair.second);
       }
     } else {
@@ -1838,7 +1846,9 @@ int LGBM_DatasetSetField(DatasetHandle handle,
   } else if (type == C_API_DTYPE_FLOAT64) {
     is_success = dataset->SetDoubleField(field_name, reinterpret_cast<const double*>(field_data), static_cast<int32_t>(num_element));
   }
-  if (!is_success) { Log::Fatal("Input data type error or field not found"); }
+  if (!is_success) {
+    Log::Fatal("Input data type error or field not found");
+  }
   API_END();
 }
 
@@ -1875,8 +1885,12 @@ int LGBM_DatasetGetField(DatasetHandle handle,
     *out_type = C_API_DTYPE_FLOAT64;
     is_success = true;
   }
-  if (!is_success) { Log::Fatal("Field not found"); }
-  if (*out_ptr == nullptr) { *out_len = 0; }
+  if (!is_success) {
+    Log::Fatal("Field not found");
+  }
+  if (*out_ptr == nullptr) {
+    *out_len = 0;
+  }
   API_END();
 }
 
diff --git a/src/io/bin.cpp b/src/io/bin.cpp
index 3d84599e6589..23434d1932c9 100644
--- a/src/io/bin.cpp
+++ b/src/io/bin.cpp
@@ -131,7 +131,9 @@ namespace LightGBM {
           upper_bounds[bin_cnt] = distinct_values[i];
           ++bin_cnt;
           lower_bounds[bin_cnt] = distinct_values[i + 1];
-          if (bin_cnt >= max_bin - 1) { break; }
+          if (bin_cnt >= max_bin - 1) {
+            break;
+          }
           cur_cnt_inbin = 0;
           if (!is_big_count_value[i]) {
             --rest_bin_cnt;
diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp
index 9c8a0417b118..4931a93d826e 100644
--- a/src/io/dataset_loader.cpp
+++ b/src/io/dataset_loader.cpp
@@ -665,7 +665,9 @@ Dataset* DatasetLoader::ConstructFromSampleData(double** sample_values,
     std::vector<int> start(num_machines);
     std::vector<int> len(num_machines);
     int step = (num_total_features + num_machines - 1) / num_machines;
-    if (step < 1) { step = 1; }
+    if (step < 1) {
+      step = 1;
+    }
 
     start[0] = 0;
     for (int i = 0; i < num_machines - 1; ++i) {
@@ -1168,7 +1170,9 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
     std::vector<int> start(num_machines);
     std::vector<int> len(num_machines);
     int step = (dataset->num_total_features_ + num_machines - 1) / num_machines;
-    if (step < 1) { step = 1; }
+    if (step < 1) {
+      step = 1;
+    }
 
     start[0] = 0;
     for (int i = 0; i < num_machines - 1; ++i) {
@@ -1284,7 +1288,9 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
       std::vector<bool> is_feature_added(dataset->num_features_, false);
       // push data
       for (auto& inner_data : oneline_features) {
-        if (inner_data.first >= dataset->num_total_features_) { continue; }
+        if (inner_data.first >= dataset->num_total_features_) {
+          continue;
+        }
         int feature_idx = dataset->used_feature_map_[inner_data.first];
         if (feature_idx >= 0) {
           is_feature_added[feature_idx] = true;
@@ -1341,7 +1347,9 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
       // push data
       std::vector<bool> is_feature_added(dataset->num_features_, false);
       for (auto& inner_data : oneline_features) {
-        if (inner_data.first >= dataset->num_total_features_) { continue; }
+        if (inner_data.first >= dataset->num_total_features_) {
+          continue;
+        }
         int feature_idx = dataset->used_feature_map_[inner_data.first];
         if (feature_idx >= 0) {
           is_feature_added[feature_idx] = true;
@@ -1414,7 +1422,9 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
       std::vector<bool> is_feature_added(dataset->num_features_, false);
       // push data
       for (auto& inner_data : oneline_features) {
-        if (inner_data.first >= dataset->num_total_features_) { continue; }
+        if (inner_data.first >= dataset->num_total_features_) {
+          continue;
+        }
         int feature_idx = dataset->used_feature_map_[inner_data.first];
         if (feature_idx >= 0) {
           is_feature_added[feature_idx] = true;
diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp
index f6f07c434661..d5504454cdf7 100644
--- a/src/io/metadata.cpp
+++ b/src/io/metadata.cpp
@@ -56,7 +56,9 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) {
       Log::Info("Using query id in data file, ignoring the additional query file");
       query_boundaries_.clear();
     }
-    if (!query_weights_.empty()) { query_weights_.clear(); }
+    if (!query_weights_.empty()) {
+      query_weights_.clear();
+    }
     queries_ = std::vector<data_size_t>(num_data_, 0);
     query_load_from_file_ = false;
   }
@@ -401,7 +403,9 @@ void Metadata::InsertInitScores(const double* init_scores, data_size_t start_ind
     // Note that len here is row count, not num_init_score, so we compare against num_data
     Log::Fatal("Inserted initial score data is too large for dataset");
   }
-  if (init_score_.empty()) { init_score_.resize(num_init_score_); }
+  if (init_score_.empty()) {
+    init_score_.resize(num_init_score_);
+  }
 
   int nclasses = num_init_score_classes();
 
@@ -455,7 +459,9 @@ void Metadata::InsertLabels(const label_t* labels, data_size_t start_index, data
   if (start_index + len > num_data_) {
     Log::Fatal("Inserted label data is too large for dataset");
   }
-  if (label_.empty()) { label_.resize(num_data_); }
+  if (label_.empty()) {
+    label_.resize(num_data_);
+  }
 
   memcpy(label_.data() + start_index, labels, sizeof(label_t) * len);
 
@@ -511,7 +517,9 @@ void Metadata::InsertWeights(const label_t* weights, data_size_t start_index, da
   if (start_index + len > num_weights_) {
     Log::Fatal("Inserted weight data is too large for dataset");
   }
-  if (weights_.empty()) { weights_.resize(num_weights_); }
+  if (weights_.empty()) {
+    weights_.resize(num_weights_);
+  }
 
   memcpy(weights_.data() + start_index, weights, sizeof(label_t) * len);
 
@@ -797,20 +805,26 @@ void Metadata::LoadFromMemory(const void* memory) {
   num_queries_ = *(reinterpret_cast<const data_size_t*>(mem_ptr));
   mem_ptr += VirtualFileWriter::AlignedSize(sizeof(num_queries_));
 
-  if (!label_.empty()) { label_.clear(); }
+  if (!label_.empty()) {
+    label_.clear();
+  }
   label_ = std::vector<label_t>(num_data_);
   std::memcpy(label_.data(), mem_ptr, sizeof(label_t) * num_data_);
   mem_ptr += VirtualFileWriter::AlignedSize(sizeof(label_t) * num_data_);
 
   if (num_weights_ > 0) {
-    if (!weights_.empty()) { weights_.clear(); }
+    if (!weights_.empty()) {
+      weights_.clear();
+    }
     weights_ = std::vector<label_t>(num_weights_);
     std::memcpy(weights_.data(), mem_ptr, sizeof(label_t) * num_weights_);
     mem_ptr += VirtualFileWriter::AlignedSize(sizeof(label_t) * num_weights_);
     weight_load_from_file_ = true;
   }
   if (num_queries_ > 0) {
-    if (!query_boundaries_.empty()) { query_boundaries_.clear(); }
+    if (!query_boundaries_.empty()) {
+      query_boundaries_.clear();
+    }
     query_boundaries_ = std::vector<data_size_t>(num_queries_ + 1);
     std::memcpy(query_boundaries_.data(), mem_ptr, sizeof(data_size_t) * (num_queries_ + 1));
     mem_ptr += VirtualFileWriter::AlignedSize(sizeof(data_size_t) *
diff --git a/src/metric/dcg_calculator.cpp b/src/metric/dcg_calculator.cpp
index 316fdf0a6ddf..6af35c657e99 100644
--- a/src/metric/dcg_calculator.cpp
+++ b/src/metric/dcg_calculator.cpp
@@ -31,7 +31,9 @@ void DCGCalculator::DefaultEvalAt(std::vector<int>* eval_at) {
 }
 
 void DCGCalculator::DefaultLabelGain(std::vector<double>* label_gain) {
-  if (!label_gain->empty()) { return; }
+  if (!label_gain->empty()) {
+    return;
+  }
   // label_gain = 2^i - 1, may overflow, so we use 31 here
   const int max_label = 31;
   label_gain->push_back(0.0f);
@@ -60,7 +62,9 @@ double DCGCalculator::CalMaxDCGAtK(data_size_t k, const label_t* label, data_siz
   }
   int top_label = static_cast<int>(label_gain_.size()) - 1;
 
-  if (k > num_data) { k = num_data; }
+  if (k > num_data) {
+    k = num_data;
+  }
   //  start from top label, and accumulate DCG
   for (data_size_t j = 0; j < k; ++j) {
     while (top_label > 0 && label_cnt[top_label] <= 0) {
@@ -90,7 +94,9 @@ void DCGCalculator::CalMaxDCG(const std::vector<data_size_t>& ks,
   // calculate k Max DCG by one pass
   for (size_t i = 0; i < ks.size(); ++i) {
     data_size_t cur_k = ks[i];
-    if (cur_k > num_data) { cur_k = num_data; }
+    if (cur_k > num_data) {
+      cur_k = num_data;
+    }
     for (data_size_t j = cur_left; j < cur_k; ++j) {
       while (top_label > 0 && label_cnt[top_label] <= 0) {
         top_label -= 1;
@@ -121,7 +127,9 @@ void DCGCalculator::CalDCG(const std::vector<data_size_t>& ks, const label_t* la
   // calculate multi dcg by one pass
   for (size_t i = 0; i < ks.size(); ++i) {
     data_size_t cur_k = ks[i];
-    if (cur_k > num_data) { cur_k = num_data; }
+    if (cur_k > num_data) {
+      cur_k = num_data;
+    }
     for (data_size_t j = cur_left; j < cur_k; ++j) {
       data_size_t idx = sorted_idx[j];
       cur_result += label_gain_[static_cast<int>(label[idx])] * discount_[j];
diff --git a/src/metric/map_metric.hpp b/src/metric/map_metric.hpp
index 76fb4fe8a3e9..3f79dcc51888 100644
--- a/src/metric/map_metric.hpp
+++ b/src/metric/map_metric.hpp
@@ -86,7 +86,9 @@ class MapMetric:public Metric {
     data_size_t cur_left = 0;
     for (size_t i = 0; i < ks.size(); ++i) {
       data_size_t cur_k = static_cast<data_size_t>(ks[i]);
-      if (cur_k > num_data) { cur_k = num_data; }
+      if (cur_k > num_data) {
+        cur_k = num_data;
+      }
       for (data_size_t j = cur_left; j < cur_k; ++j) {
         data_size_t idx = sorted_idx[j];
         if (label[idx] > 0.5f) {
diff --git a/src/network/linker_topo.cpp b/src/network/linker_topo.cpp
index af46ef4f494e..6823ec6064b0 100644
--- a/src/network/linker_topo.cpp
+++ b/src/network/linker_topo.cpp
@@ -68,7 +68,9 @@ RecursiveHalvingMap::RecursiveHalvingMap(int in_k, RecursiveHalvingNodeType _typ
 RecursiveHalvingMap RecursiveHalvingMap::Construct(int rank, int num_machines) {
   // construct all recursive halving map for all machines
   int k = 0;
-  while ((1 << k) <= num_machines) { ++k; }
+  while ((1 << k) <= num_machines) {
+    ++k;
+  }
   // let 1 << k <= num_machines
   --k;
   // distance of each communication
diff --git a/src/objective/rank_objective.hpp b/src/objective/rank_objective.hpp
index 8227c7b65658..61d560bb6af8 100644
--- a/src/objective/rank_objective.hpp
+++ b/src/objective/rank_objective.hpp
@@ -206,11 +206,17 @@ class LambdarankNDCG : public RankingObjective {
     double sum_lambdas = 0.0;
     // start accumulate lambdas by pairs that contain at least one document above truncation level
     for (data_size_t i = 0; i < cnt - 1 && i < truncation_level_; ++i) {
-      if (score[sorted_idx[i]] == kMinScore) { continue; }
+      if (score[sorted_idx[i]] == kMinScore) {
+        continue;
+      }
       for (data_size_t j = i + 1; j < cnt; ++j) {
-        if (score[sorted_idx[j]] == kMinScore) { continue; }
+        if (score[sorted_idx[j]] == kMinScore) {
+          continue;
+        }
         // skip pairs with the same labels
-        if (label[sorted_idx[i]] == label[sorted_idx[j]]) { continue; }
+        if (label[sorted_idx[i]] == label[sorted_idx[j]]) {
+          continue;
+        }
         data_size_t high_rank, low_rank;
         if (label[sorted_idx[i]] > label[sorted_idx[j]]) {
           high_rank = i;
diff --git a/src/treelearner/col_sampler.hpp b/src/treelearner/col_sampler.hpp
index c70b07e50efa..0151337965f1 100644
--- a/src/treelearner/col_sampler.hpp
+++ b/src/treelearner/col_sampler.hpp
@@ -100,7 +100,9 @@ class ColSampler {
           allowed_features.insert(constraint.begin(), constraint.end());
         }
         for (int feat : branch_features) {
-          if (constraint.count(feat) == 0) { break; }
+          if (constraint.count(feat) == 0) {
+            break;
+          }
           ++num_feat_found;
           if (num_feat_found == static_cast<int>(branch_features.size())) {
             allowed_features.insert(constraint.begin(), constraint.end());
diff --git a/src/treelearner/data_parallel_tree_learner.cpp b/src/treelearner/data_parallel_tree_learner.cpp
index 670788118455..deb74f9cde64 100644
--- a/src/treelearner/data_parallel_tree_learner.cpp
+++ b/src/treelearner/data_parallel_tree_learner.cpp
@@ -128,7 +128,9 @@ void DataParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
   std::vector<int> num_bins_distributed(num_machines_, 0);
   for (int i = 0; i < this->train_data_->num_total_features(); ++i) {
     int inner_feature_index = this->train_data_->InnerFeatureIndex(i);
-    if (inner_feature_index == -1) { continue; }
+    if (inner_feature_index == -1) {
+      continue;
+    }
     if (this->col_sampler_.is_feature_used_bytree()[inner_feature_index]) {
       int cur_min_machine = static_cast<int>(ArrayArgs<int>::ArgMin(num_bins_distributed));
       feature_distribution[cur_min_machine].push_back(inner_feature_index);
diff --git a/src/treelearner/feature_parallel_tree_learner.cpp b/src/treelearner/feature_parallel_tree_learner.cpp
index c5202f3d706d..4d5324d8fd51 100644
--- a/src/treelearner/feature_parallel_tree_learner.cpp
+++ b/src/treelearner/feature_parallel_tree_learner.cpp
@@ -42,7 +42,9 @@ void FeatureParallelTreeLearner<TREELEARNER_T>::BeforeTrain() {
   std::vector<int> num_bins_distributed(num_machines_, 0);
   for (int i = 0; i < this->train_data_->num_total_features(); ++i) {
     int inner_feature_index = this->train_data_->InnerFeatureIndex(i);
-    if (inner_feature_index == -1) { continue; }
+    if (inner_feature_index == -1) {
+      continue;
+    }
     if (this->col_sampler_.is_feature_used_bytree()[inner_feature_index]) {
       int cur_min_machine = static_cast<int>(ArrayArgs<int>::ArgMin(num_bins_distributed));
       feature_distribution[cur_min_machine].push_back(inner_feature_index);
diff --git a/src/treelearner/gpu_tree_learner.cpp b/src/treelearner/gpu_tree_learner.cpp
index 1bf21d65ccc6..8d0bdc5549d4 100644
--- a/src/treelearner/gpu_tree_learner.cpp
+++ b/src/treelearner/gpu_tree_learner.cpp
@@ -245,7 +245,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
   }
   // allocate memory for all features (FIXME: 4 GB barrier on some devices, need to split to multiple buffers)
   device_features_.reset();
-  device_features_ = std::unique_ptr<boost::compute::vector<Feature4>>(new boost::compute::vector<Feature4>((uint64_t)num_dense_feature4_ * num_data_, ctx_));
+  device_features_ = std::unique_ptr<boost::compute::vector<Feature4>>(new boost::compute::vector<Feature4>(static_cast<uint64_t>(num_dense_feature4_) * num_data_, ctx_));
   // unpin old buffer if necessary before destructing them
   if (ptr_pinned_gradients_) {
     queue_.enqueue_unmap_buffer(pinned_gradients_, ptr_pinned_gradients_);
@@ -427,7 +427,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
     }
     #pragma omp critical
     queue_.enqueue_write_buffer(device_features_->get_buffer(),
-                        (uint64_t)i * num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4);
+                        static_cast<uint64_t>(i) * num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4);
     #if GPU_DEBUG >= 1
     printf("first example of feature-group tuple is: %d %d %d %d\n", host4[0].s[0], host4[0].s[1], host4[0].s[2], host4[0].s[3]);
     printf("Feature-groups copied to device with multipliers ");
@@ -503,7 +503,7 @@ void GPUTreeLearner::AllocateGPUMemory() {
     }
     // copying the last 1 to (dword_features - 1) feature-groups in the last tuple
     queue_.enqueue_write_buffer(device_features_->get_buffer(),
-                        (num_dense_feature4_ - 1) * (uint64_t)num_data_ * sizeof(Feature4), num_data_ * sizeof(Feature4), host4);
+                        (num_dense_feature4_ - 1) * static_cast<uint64_t>(num_data_) * sizeof(Feature4), num_data_ * sizeof(Feature4), host4);
     #if GPU_DEBUG >= 1
     printf("Last features copied to device\n");
     #endif
@@ -1089,7 +1089,9 @@ void GPUTreeLearner::FindBestSplits(const Tree* tree) {
     size_t bin_size = train_data_->FeatureNumBin(feature_index) + 1;
     printf("Feature %d smaller leaf:\n", feature_index);
     PrintHistograms(smaller_leaf_histogram_array_[feature_index].RawData() - kHistOffset, bin_size);
-    if (larger_leaf_splits_ == nullptr || larger_leaf_splits_->leaf_index() < 0) { continue; }
+    if (larger_leaf_splits_ == nullptr || larger_leaf_splits_->leaf_index() < 0) {
+      continue;
+    }
     printf("Feature %d larger leaf:\n", feature_index);
     PrintHistograms(larger_leaf_histogram_array_[feature_index].RawData() - kHistOffset, bin_size);
   }
diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp
index 01cdd7623c02..4d62ae370791 100644
--- a/src/treelearner/serial_tree_learner.cpp
+++ b/src/treelearner/serial_tree_learner.cpp
@@ -368,12 +368,16 @@ bool SerialTreeLearner::BeforeFindBestSplit(const Tree* tree, int left_leaf, int
     larger_leaf_histogram_array_ = nullptr;
   } else if (num_data_in_left_child < num_data_in_right_child) {
     // put parent(left) leaf's histograms into larger leaf's histograms
-    if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
+    if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) {
+      parent_leaf_histogram_array_ = larger_leaf_histogram_array_;
+    }
     histogram_pool_.Move(left_leaf, right_leaf);
     histogram_pool_.Get(left_leaf, &smaller_leaf_histogram_array_);
   } else {
     // put parent(left) leaf's histograms to larger leaf's histograms
-    if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) { parent_leaf_histogram_array_ = larger_leaf_histogram_array_; }
+    if (histogram_pool_.Get(left_leaf, &larger_leaf_histogram_array_)) {
+      parent_leaf_histogram_array_ = larger_leaf_histogram_array_;
+    }
     histogram_pool_.Get(right_leaf, &smaller_leaf_histogram_array_);
   }
   return true;
diff --git a/src/treelearner/voting_parallel_tree_learner.cpp b/src/treelearner/voting_parallel_tree_learner.cpp
index 37f2d4cf2641..f18340634496 100644
--- a/src/treelearner/voting_parallel_tree_learner.cpp
+++ b/src/treelearner/voting_parallel_tree_learner.cpp
@@ -268,7 +268,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
     #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
     for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
       OMP_LOOP_EX_BEGIN();
-      if (!is_feature_used[feature_index]) { continue; }
+      if (!is_feature_used[feature_index]) {
+        continue;
+      }
       const BinMapper* feature_bin_mapper = this->train_data_->FeatureBinMapper(feature_index);
       const int num_bin = feature_bin_mapper->num_bin();
       const int offset = static_cast<int>(feature_bin_mapper->GetMostFreqBin() == 0);
@@ -288,7 +290,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
         #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
         for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
           OMP_LOOP_EX_BEGIN();
-          if (!is_feature_used[feature_index]) { continue; }
+          if (!is_feature_used[feature_index]) {
+            continue;
+          }
           const BinMapper* feature_bin_mapper = this->train_data_->FeatureBinMapper(feature_index);
           const int num_bin = feature_bin_mapper->num_bin();
           const int offset = static_cast<int>(feature_bin_mapper->GetMostFreqBin() == 0);
@@ -310,7 +314,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
 #pragma omp parallel for num_threads(OMP_NUM_THREADS()) schedule(static)
   for (int feature_index = 0; feature_index < this->num_features_; ++feature_index) {
     OMP_LOOP_EX_BEGIN();
-    if (!is_feature_used[feature_index]) { continue; }
+    if (!is_feature_used[feature_index]) {
+      continue;
+    }
     const int real_feature_index = this->train_data_->RealFeatureIndex(feature_index);
     this->train_data_->FixHistogram(feature_index,
       this->smaller_leaf_splits_->sum_gradients(), this->smaller_leaf_splits_->sum_hessians(),
@@ -323,7 +329,9 @@ void VotingParallelTreeLearner<TREELEARNER_T>::FindBestSplits(const Tree* tree)
         &smaller_bestsplit_per_features[feature_index],
         smaller_leaf_parent_output);
     // only has root leaf
-    if (this->larger_leaf_splits_ == nullptr || this->larger_leaf_splits_->leaf_index() < 0) { continue; }
+    if (this->larger_leaf_splits_ == nullptr || this->larger_leaf_splits_->leaf_index() < 0) {
+      continue;
+    }
 
     if (use_subtract) {
       this->larger_leaf_histogram_array_[feature_index].Subtract(this->smaller_leaf_histogram_array_[feature_index]);

From eade0a9c3c9b905fccb5867e32a504ed229fcfe5 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 20 Aug 2025 22:18:52 -0500
Subject: [PATCH 2/4] remove bashisms

---
 .ci/check-omp-pragmas.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/check-omp-pragmas.sh b/.ci/check-omp-pragmas.sh
index 42c2e52e0450..e9b08632900c 100755
--- a/.ci/check-omp-pragmas.sh
+++ b/.ci/check-omp-pragmas.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-set -e -E -u -o pipefail
+set -e -u
 
 echo "checking that all OpenMP pragmas specify num_threads()"
 get_omp_pragmas_without_num_threads() {

From bb2dd5b2bd21c2708dd75b87cc011a86c170645d Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 20 Aug 2025 23:11:53 -0500
Subject: [PATCH 3/4] one more pipefail use

---
 .ci/check-omp-pragmas.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/check-omp-pragmas.sh b/.ci/check-omp-pragmas.sh
index e9b08632900c..8c7fcf084167 100755
--- a/.ci/check-omp-pragmas.sh
+++ b/.ci/check-omp-pragmas.sh
@@ -21,7 +21,7 @@ get_omp_pragmas_without_num_threads() {
 # consider this a failure and stop execution of the script.
 #
 # ref: https://www.gnu.org/software/grep/manual/html_node/Exit-Status.html
-set +e +o pipefail
+set +e
 PROBLEMATIC_LINES=$(
     get_omp_pragmas_without_num_threads
 )

From e7e7c64765f7de9e247726c5012bef7c78f43e35 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 20 Aug 2025 23:16:24 -0500
Subject: [PATCH 4/4] another pipefail

---
 .ci/check-omp-pragmas.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/check-omp-pragmas.sh b/.ci/check-omp-pragmas.sh
index 8c7fcf084167..ac83b164fb0b 100755
--- a/.ci/check-omp-pragmas.sh
+++ b/.ci/check-omp-pragmas.sh
@@ -25,7 +25,7 @@ set +e
 PROBLEMATIC_LINES=$(
     get_omp_pragmas_without_num_threads
 )
-set -e -o pipefail
+set -e
 if test "${PROBLEMATIC_LINES}" != ""; then
     get_omp_pragmas_without_num_threads
     echo "Found '#pragma omp parallel' not using explicit num_threads() configuration. Fix those."