Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions .ci/lint-cpp.sh → .ci/check-omp-pragmas.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
#!/bin/bash

set -e -E -u -o pipefail

echo "running cpplint"
cpplint \
--filter=-build/c++11,-build/include_subdir,-build/header_guard,-whitespace/line_length \
--recursive ./src ./include ./R-package ./swig ./tests \
|| exit 1
echo "done running cpplint"
set -e -u

echo "checking that all OpenMP pragmas specify num_threads()"
get_omp_pragmas_without_num_threads() {
Expand All @@ -28,11 +21,11 @@ get_omp_pragmas_without_num_threads() {
# consider this a failure and stop execution of the script.
#
# ref: https://www.gnu.org/software/grep/manual/html_node/Exit-Status.html
set +e +o pipefail
set +e
PROBLEMATIC_LINES=$(
get_omp_pragmas_without_num_threads
)
set -e -o pipefail
set -e
if test "${PROBLEMATIC_LINES}" != ""; then
get_omp_pragmas_without_num_threads
echo "Found '#pragma omp parallel' not using explicit num_threads() configuration. Fix those."
Expand Down
3 changes: 0 additions & 3 deletions .ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ if [[ $TASK == "lint" ]]; then
conda create -q -y -n "${CONDA_ENV}" \
"${CONDA_PYTHON_REQUIREMENT}" \
'biome>=1.9.3' \
'cpplint>=1.6.0' \
'matplotlib-base>=3.9.1' \
'mypy>=1.11.1' \
'pre-commit>=3.8.0' \
Expand All @@ -118,8 +117,6 @@ if [[ $TASK == "lint" ]]; then
bash ./.ci/run-pre-commit-mypy.sh || exit 1
echo "Linting R code"
Rscript ./.ci/lint-r-code.R "${BUILD_DIRECTORY}" || exit 1
echo "Linting C++ code"
bash ./.ci/lint-cpp.sh || exit 1
echo "Linting JavaScript code"
bash ./.ci/lint-js.sh || exit 1
exit 0
Expand Down
16 changes: 16 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,22 @@ repos:
hooks:
- id: cmakelint
args: ["--linelength=120", "--filter=-convention/filename,-package/stdargs,-readability/wonkycase"]
- repo: https://github.com/cpplint/cpplint
rev: '2.0.2'
hooks:
- id: cpplint
args:
- --recursive
- --filter=-build/c++11,-build/include_subdir,-build/include_what_you_use,-build/header_guard,-whitespace/indent_namespace,-whitespace/line_length
- repo: local
hooks:
- id: check-omp-pragmas
name: check-omp-pragmas
entry: sh
args:
- .ci/check-omp-pragmas.sh
language: system
pass_filenames: false
- repo: https://github.com/adrienverge/yamllint
rev: v1.37.1
hooks:
Expand Down
16 changes: 12 additions & 4 deletions include/LightGBM/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -554,9 +554,13 @@ class Dataset {
}

inline void FinishOneRow(int tid, data_size_t row_idx, const std::vector<bool>& is_feature_added) {
if (is_finish_load_) { return; }
if (is_finish_load_) {
return;
}
for (auto fidx : feature_need_push_zeros_) {
if (is_feature_added[fidx]) { continue; }
if (is_feature_added[fidx]) {
continue;
}
const int group = feature2group_[fidx];
const int sub_feature = feature2subfeature_[fidx];
feature_groups_[group]->PushData(tid, sub_feature, row_idx, 0.0f);
Expand Down Expand Up @@ -587,10 +591,14 @@ class Dataset {
}

inline void PushOneRow(int tid, data_size_t row_idx, const std::vector<std::pair<int, double>>& feature_values) {
if (is_finish_load_) { return; }
if (is_finish_load_) {
return;
}
std::vector<bool> is_feature_added(num_features_, false);
for (auto& inner_data : feature_values) {
if (inner_data.first >= num_total_features_) { continue; }
if (inner_data.first >= num_total_features_) {
continue;
}
int feature_idx = used_feature_map_[inner_data.first];
if (feature_idx >= 0) {
is_feature_added[feature_idx] = true;
Expand Down
28 changes: 22 additions & 6 deletions include/LightGBM/utils/array_args.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,17 +112,33 @@ class ArrayArgs {
VAL_T v = ref[end - 1];
for (;;) {
while (ref[++i] > v) {}
while (v > ref[--j]) { if (j == start) { break; } }
if (i >= j) { break; }
while (v > ref[--j]) {
if (j == start) {
break;
}
}
if (i >= j) {
break;
}
std::swap(ref[i], ref[j]);
if (ref[i] == v) { p++; std::swap(ref[p], ref[i]); }
if (v == ref[j]) { q--; std::swap(ref[j], ref[q]); }
if (ref[i] == v) {
p++;
std::swap(ref[p], ref[i]);
}
if (v == ref[j]) {
q--;
std::swap(ref[j], ref[q]);
}
}
std::swap(ref[i], ref[end - 1]);
j = i - 1;
i = i + 1;
for (int k = start; k <= p; k++, j--) { std::swap(ref[k], ref[j]); }
for (int k = end - 2; k >= q; k--, i++) { std::swap(ref[i], ref[k]); }
for (int k = start; k <= p; k++, j--) {
std::swap(ref[k], ref[j]);
}
for (int k = end - 2; k >= q; k--, i++) {
std::swap(ref[i], ref[k]);
}
*l = j;
*r = i;
}
Expand Down
19 changes: 15 additions & 4 deletions include/LightGBM/utils/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,18 @@ inline static const char* Atof(const char* p, double* out) {
}
if (expon > 308) expon = 308;
// Calculate scaling factor.
while (expon >= 50) { scale *= 1E50; expon -= 50; }
while (expon >= 8) { scale *= 1E8; expon -= 8; }
while (expon > 0) { scale *= 10.0; expon -= 1; }
while (expon >= 50) {
scale *= 1E50;
expon -= 50;
}
while (expon >= 8) {
scale *= 1E8;
expon -= 8;
}
while (expon > 0) {
scale *= 10.0;
expon -= 1;
}
}
// Return signed and scaled floating point result.
*out = sign * (frac ? (value / scale) : (value * scale));
Expand Down Expand Up @@ -713,7 +722,9 @@ static void ParallelSort(_RanIt _First, _RanIt _Last, _Pr _Pred, _VTRanIt*) {
size_t mid = left + s;
size_t right = mid + s;
right = std::min(len, right);
if (mid >= right) { continue; }
if (mid >= right) {
continue;
}
std::copy(_First + left, _First + mid, buf + left);
std::merge(buf + left, buf + mid, _First + mid, _First + right, _First + left, _Pred);
}
Expand Down
8 changes: 6 additions & 2 deletions include/LightGBM/utils/openmp_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,13 @@ class ThreadExceptionHelper {
}
void CaptureException() {
// only catch first exception.
if (ex_ptr_ != nullptr) { return; }
if (ex_ptr_ != nullptr) {
return;
}
std::unique_lock<std::mutex> guard(lock_);
if (ex_ptr_ != nullptr) { return; }
if (ex_ptr_ != nullptr) {
return;
}
ex_ptr_ = std::current_exception();
}

Expand Down
8 changes: 6 additions & 2 deletions include/LightGBM/utils/text_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ class TextReader {
++i;
++total_cnt;
// skip end of line
while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) { ++i; }
while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) {
++i;
}
last_i = i;
} else {
++i;
Expand Down Expand Up @@ -284,7 +286,9 @@ class TextReader {
++i;
++total_cnt;
// skip end of line
while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) { ++i; }
while ((buffer_process[i] == '\n' || buffer_process[i] == '\r') && i < read_cnt) {
++i;
}
last_i = i;
} else {
++i;
Expand Down
8 changes: 6 additions & 2 deletions src/application/application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,9 @@ void Application::LoadData() {
if (config_.is_provide_training_metric) {
for (auto metric_type : config_.metric) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
if (metric == nullptr) { continue; }
if (metric == nullptr) {
continue;
}
metric->Init(train_data_->metadata(), train_data_->num_data());
train_metric_.push_back(std::move(metric));
}
Expand Down Expand Up @@ -149,7 +151,9 @@ void Application::LoadData() {
valid_metrics_.emplace_back();
for (auto metric_type : config_.metric) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
if (metric == nullptr) { continue; }
if (metric == nullptr) {
continue;
}
metric->Init(valid_datas_.back()->metadata(),
valid_datas_.back()->num_data());
valid_metrics_.back().push_back(std::move(metric));
Expand Down
16 changes: 12 additions & 4 deletions src/boosting/gbdt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ void GBDT::AddValidDataset(const Dataset* valid_data,

if (early_stopping_round_ > 0) {
auto num_metrics = valid_metrics.size();
if (es_first_metric_only_) { num_metrics = 1; }
if (es_first_metric_only_) {
num_metrics = 1;
}
best_iter_.emplace_back(num_metrics, 0);
best_score_.emplace_back(num_metrics, kMinScore);
best_msg_.emplace_back(num_metrics);
Expand Down Expand Up @@ -452,7 +454,9 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
}

void GBDT::RollbackOneIter() {
if (iter_ <= 0) { return; }
if (iter_ <= 0) {
return;
}
// reset score
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
auto curr_tree = models_.size() - num_tree_per_iteration_ + cur_tree_id;
Expand Down Expand Up @@ -588,15 +592,19 @@ std::string GBDT::OutputMetric(int iter) {
msg_buf << tmp_buf.str() << '\n';
}
}
if (es_first_metric_only_ && j > 0) { continue; }
if (es_first_metric_only_ && j > 0) {
continue;
}
if (ret.empty() && early_stopping_round_ > 0) {
auto cur_score = valid_metrics_[i][j]->factor_to_bigger_better() * test_scores.back();
if (cur_score - best_score_[i][j] > early_stopping_min_delta_) {
best_score_[i][j] = cur_score;
best_iter_[i][j] = iter;
meet_early_stopping_pairs.emplace_back(i, j);
} else {
if (iter - best_iter_[i][j] >= early_stopping_round_) { ret = best_msg_[i][j]; }
if (iter - best_iter_[i][j] >= early_stopping_round_) {
ret = best_msg_[i][j];
}
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/boosting/goss.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ class GOSSStrategy : public SampleStrategy {
void Bagging(int iter, TreeLearner* tree_learner, score_t* gradients, score_t* hessians) override {
bag_data_cnt_ = num_data_;
// not subsample for first iterations
if (iter < static_cast<int>(1.0f / config_->learning_rate)) { return; }
if (iter < static_cast<int>(1.0f / config_->learning_rate)) {
return;
}
auto left_cnt = bagging_runner_.Run<true>(
num_data_,
[=](int, data_size_t cur_start, data_size_t cur_cnt, data_size_t* left,
Expand Down
4 changes: 3 additions & 1 deletion src/boosting/rf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,9 @@ class RF : public GBDT {
}

void RollbackOneIter() override {
if (iter_ <= 0) { return; }
if (iter_ <= 0) {
return;
}
int cur_iter = iter_ + num_init_iteration_ - 1;
// reset score
for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
Expand Down
28 changes: 21 additions & 7 deletions src/c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,9 @@ class Booster {
for (auto metric_type : config_.metric) {
auto metric = std::unique_ptr<Metric>(
Metric::CreateMetric(metric_type, config_));
if (metric == nullptr) { continue; }
if (metric == nullptr) {
continue;
}
metric->Init(train_data_->metadata(), train_data_->num_data());
train_metric_.push_back(std::move(metric));
}
Expand Down Expand Up @@ -394,7 +396,9 @@ class Booster {
valid_metrics_.emplace_back();
for (auto metric_type : config_.metric) {
auto metric = std::unique_ptr<Metric>(Metric::CreateMetric(metric_type, config_));
if (metric == nullptr) { continue; }
if (metric == nullptr) {
continue;
}
metric->Init(valid_data->metadata(), valid_data->num_data());
valid_metrics_.back().push_back(std::move(metric));
}
Expand Down Expand Up @@ -1603,7 +1607,9 @@ int LGBM_DatasetCreateFromCSC(const void* col_ptr,
OMP_LOOP_EX_BEGIN();
const int tid = omp_get_thread_num();
int feature_idx = ret->InnerFeatureIndex(i);
if (feature_idx < 0) { continue; }
if (feature_idx < 0) {
continue;
}
int group = ret->Feature2Group(feature_idx);
int sub_feature = ret->Feture2SubFeature(feature_idx);
CSC_RowIterator col_it(col_ptr, col_ptr_type, indices, data, data_type, ncol_ptr, nelem, i);
Expand All @@ -1614,7 +1620,9 @@ int LGBM_DatasetCreateFromCSC(const void* col_ptr,
auto pair = col_it.NextNonZero();
row_idx = pair.first;
// no more data
if (row_idx < 0) { break; }
if (row_idx < 0) {
break;
}
ret->PushOneData(tid, row_idx, group, feature_idx, sub_feature, pair.second);
}
} else {
Expand Down Expand Up @@ -1838,7 +1846,9 @@ int LGBM_DatasetSetField(DatasetHandle handle,
} else if (type == C_API_DTYPE_FLOAT64) {
is_success = dataset->SetDoubleField(field_name, reinterpret_cast<const double*>(field_data), static_cast<int32_t>(num_element));
}
if (!is_success) { Log::Fatal("Input data type error or field not found"); }
if (!is_success) {
Log::Fatal("Input data type error or field not found");
}
API_END();
}

Expand Down Expand Up @@ -1875,8 +1885,12 @@ int LGBM_DatasetGetField(DatasetHandle handle,
*out_type = C_API_DTYPE_FLOAT64;
is_success = true;
}
if (!is_success) { Log::Fatal("Field not found"); }
if (*out_ptr == nullptr) { *out_len = 0; }
if (!is_success) {
Log::Fatal("Field not found");
}
if (*out_ptr == nullptr) {
*out_len = 0;
}
API_END();
}

Expand Down
4 changes: 3 additions & 1 deletion src/io/bin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,9 @@ namespace LightGBM {
upper_bounds[bin_cnt] = distinct_values[i];
++bin_cnt;
lower_bounds[bin_cnt] = distinct_values[i + 1];
if (bin_cnt >= max_bin - 1) { break; }
if (bin_cnt >= max_bin - 1) {
break;
}
cur_cnt_inbin = 0;
if (!is_big_count_value[i]) {
--rest_bin_cnt;
Expand Down
Loading
Loading