Skip to content

Commit 3be1b8b

Browse files
authored
Add xgboost to benchmark utilities (#7350)
PR adds xgboost regression and classification to cuML's benchmark utilities. Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Simon Adorf (https://github.com/csadorf) URL: #7350
1 parent 8882188 commit 3be1b8b

File tree

4 files changed

+64
-3
lines changed

4 files changed

+64
-3
lines changed

python/cuml/cuml/benchmark/algorithms.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,13 @@
7070
HDBSCAN = None
7171

7272

73+
try:
74+
from xgboost import XGBClassifier, XGBRegressor
75+
except ImportError:
76+
XGBClassifier = None
77+
XGBRegressor = None
78+
79+
7380
class AlgorithmPair:
7481
"""
7582
Wraps a cuML algorithm and (optionally) a cpu-based algorithm
@@ -347,6 +354,28 @@ def all_algorithms():
347354
accepts_labels=True,
348355
accuracy_function=metrics.r2_score,
349356
),
357+
AlgorithmPair(
358+
XGBClassifier,
359+
XGBClassifier,
360+
shared_args={"tree_method": "hist", "n_estimators": 100},
361+
cpu_args={"n_jobs": -1},
362+
cuml_args={"device": "cuda"},
363+
name="xgboost-classification",
364+
accepts_labels=True,
365+
cpu_data_prep_hook=_labels_to_int_hook,
366+
cuml_data_prep_hook=_labels_to_int_hook,
367+
accuracy_function=metrics.accuracy_score,
368+
),
369+
AlgorithmPair(
370+
XGBRegressor,
371+
XGBRegressor,
372+
shared_args={"tree_method": "hist", "n_estimators": 100},
373+
cpu_args={"n_jobs": -1},
374+
cuml_args={"device": "cuda"},
375+
name="xgboost-regression",
376+
accepts_labels=True,
377+
accuracy_function=metrics.r2_score,
378+
),
350379
AlgorithmPair(
351380
sklearn.manifold.TSNE,
352381
cuml.manifold.TSNE,

python/cuml/cuml/benchmark/automated/bench_classification.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,12 @@ def bench_svc_linear(gpubenchmark, bench_step, classification): # noqa: F811
6868

6969
def bench_svc_rbf(gpubenchmark, bench_step, classification): # noqa: F811
7070
_benchmark_algo(gpubenchmark, "SVC-RBF", bench_step, classification)
71+
72+
73+
def bench_xgboost_classification(
74+
gpubenchmark, bench_step, classification # noqa: F811
75+
):
76+
pytest.importorskip("xgboost")
77+
_benchmark_algo(
78+
gpubenchmark, "xgboost-classification", bench_step, classification
79+
)

python/cuml/cuml/benchmark/automated/bench_regression.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,12 @@ def bench_svr_rbf(gpubenchmark, bench_step, regression1): # noqa: F811
8585

8686
def bench_svr_linear(gpubenchmark, bench_step, regression2): # noqa: F811
8787
_benchmark_algo(gpubenchmark, "SVR-Linear", bench_step, regression2)
88+
89+
90+
def bench_xgboost_regression(
91+
gpubenchmark, bench_step, regression1 # noqa: F811
92+
):
93+
pytest.importorskip("xgboost")
94+
_benchmark_algo(
95+
gpubenchmark, "xgboost-regression", bench_step, regression1
96+
)

python/cuml/tests/test_benchmark.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,16 +182,30 @@ def predict(self, X):
182182
# skipping UMAP-Supervised due to issue
183183
# https://github.com/rapidsai/cuml/issues/4243
184184
@pytest.mark.parametrize(
185-
"algo_name", ["DBSCAN", "LogisticRegression", "ElasticNet", "FIL"]
185+
"algo_name",
186+
[
187+
"DBSCAN",
188+
"LogisticRegression",
189+
"ElasticNet",
190+
"FIL",
191+
"xgboost-classification",
192+
"xgboost-regression",
193+
],
186194
)
187195
def test_real_algos_runner(algo_name):
188196
pair = algorithms.algorithm_by_name(algo_name)
189197

190-
if algo_name == "FIL":
198+
if algo_name in ["FIL", "xgboost-classification", "xgboost-regression"]:
191199
pytest.importorskip("xgboost")
192200

201+
# Use appropriate dataset for regression algorithms
202+
if algo_name in ["ElasticNet", "xgboost-regression"]:
203+
dataset = "regression"
204+
else:
205+
dataset = "classification"
206+
193207
runner = AccuracyComparisonRunner(
194-
[50], [5], dataset_name="classification", test_fraction=0.20
208+
[50], [5], dataset_name=dataset, test_fraction=0.20
195209
)
196210
results = runner.run(pair)[0]
197211
print(results)

0 commit comments

Comments
 (0)