|
24 | 24 | ) |
25 | 25 | from sklearnex import config_context |
26 | 26 | from sklearnex.tests.utils.spmd import ( |
27 | | - _assert_unordered_allclose, |
28 | 27 | _generate_classification_data, |
29 | 28 | _generate_regression_data, |
| 29 | + _generate_statistic_data, |
30 | 30 | _get_local_tensor, |
31 | 31 | _mpi_libs_and_gpu_available, |
32 | 32 | _spmd_assert_allclose, |
@@ -94,8 +94,8 @@ def test_knncls_spmd_gold(dataframe, queue): |
94 | 94 | spmd_result = spmd_model.predict(local_dpt_X_test) |
95 | 95 | batch_result = batch_model.predict(X_test) |
96 | 96 |
|
97 | | - _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True) |
98 | | - _assert_unordered_allclose(spmd_dists, batch_dists, localize=True) |
| 97 | + _spmd_assert_allclose(spmd_indcs, batch_indcs) |
| 98 | + _spmd_assert_allclose(spmd_dists, batch_dists) |
99 | 99 | _spmd_assert_allclose(spmd_result, batch_result) |
100 | 100 |
|
101 | 101 |
|
@@ -164,10 +164,8 @@ def test_knncls_spmd_synthetic( |
164 | 164 |
|
165 | 165 | tol = 1e-4 |
166 | 166 | if dtype == np.float64: |
167 | | - _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True) |
168 | | - _assert_unordered_allclose( |
169 | | - spmd_dists, batch_dists, localize=True, rtol=tol, atol=tol |
170 | | - ) |
| 167 | + _spmd_assert_allclose(spmd_indcs, batch_indcs) |
| 168 | + _spmd_assert_allclose(spmd_dists, batch_dists, rtol=tol, atol=tol) |
171 | 169 | _spmd_assert_allclose(spmd_result, batch_result) |
172 | 170 |
|
173 | 171 |
|
@@ -231,8 +229,8 @@ def test_knnreg_spmd_gold(dataframe, queue): |
231 | 229 | spmd_result = spmd_model.predict(local_dpt_X_test) |
232 | 230 | batch_result = batch_model.predict(X_test) |
233 | 231 |
|
234 | | - _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True) |
235 | | - _assert_unordered_allclose(spmd_dists, batch_dists, localize=True) |
| 232 | + _spmd_assert_allclose(spmd_indcs, batch_indcs) |
| 233 | + _spmd_assert_allclose(spmd_dists, batch_dists) |
236 | 234 | _spmd_assert_allclose(spmd_result, batch_result) |
237 | 235 |
|
238 | 236 |
|
@@ -303,8 +301,133 @@ def test_knnreg_spmd_synthetic( |
303 | 301 |
|
304 | 302 | tol = 0.005 if dtype == np.float32 else 1e-4 |
305 | 303 | if dtype == np.float64: |
306 | | - _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True) |
307 | | - _assert_unordered_allclose( |
308 | | - spmd_dists, batch_dists, localize=True, rtol=tol, atol=tol |
309 | | - ) |
| 304 | + _spmd_assert_allclose(spmd_indcs, batch_indcs) |
| 305 | + _spmd_assert_allclose(spmd_dists, batch_dists, rtol=tol, atol=tol) |
310 | 306 | _spmd_assert_allclose(spmd_result, batch_result, rtol=tol, atol=tol) |
| 307 | + |
| 308 | + |
| 309 | +@pytest.mark.skipif( |
| 310 | + not _mpi_libs_and_gpu_available, |
| 311 | + reason="GPU device and MPI libs required for test", |
| 312 | +) |
| 313 | +@pytest.mark.parametrize( |
| 314 | + "dataframe,queue", |
| 315 | + get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), |
| 316 | +) |
| 317 | +@pytest.mark.mpi |
| 318 | +def test_knnsearch_spmd_gold(dataframe, queue): |
| 319 | + # Import spmd and batch algo |
| 320 | + from sklearnex.neighbors import NearestNeighbors as NearestNeighbors_Batch |
| 321 | + from sklearnex.spmd.neighbors import NearestNeighbors as NearestNeighbors_SPMD |
| 322 | + |
| 323 | + # Create gold data and convert to dataframe |
| 324 | + X_train = np.array( |
| 325 | + [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [10, 10], [9, 9]] |
| 326 | + ) |
| 327 | + local_dpt_X_train = _convert_to_dataframe( |
| 328 | + _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe |
| 329 | + ) |
| 330 | + |
| 331 | + # Ensure predictions of batch algo match spmd |
| 332 | + spmd_model = NearestNeighbors_SPMD(n_neighbors=2, algorithm="brute").fit( |
| 333 | + local_dpt_X_train |
| 334 | + ) |
| 335 | + batch_model = NearestNeighbors_Batch(n_neighbors=2, algorithm="brute").fit(X_train) |
| 336 | + spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_train) |
| 337 | + batch_dists, batch_indcs = batch_model.kneighbors(X_train) |
| 338 | + |
| 339 | + _spmd_assert_allclose(spmd_indcs, batch_indcs) |
| 340 | + _spmd_assert_allclose(spmd_dists, batch_dists) |
| 341 | + |
| 342 | + |
| 343 | +@pytest.mark.skipif( |
| 344 | + not _mpi_libs_and_gpu_available, |
| 345 | + reason="GPU device and MPI libs required for test", |
| 346 | +) |
| 347 | +@pytest.mark.parametrize( |
| 348 | + "dimensions", [{"n": 100, "m": 10, "k": 2}, {"n": 100000, "m": 100, "k": 100}] |
| 349 | +) |
| 350 | +@pytest.mark.parametrize( |
| 351 | + "dataframe,queue", |
| 352 | + get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), |
| 353 | +) |
| 354 | +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) |
| 355 | +@pytest.mark.mpi |
| 356 | +def test_knnsearch_spmd_synthetic( |
| 357 | + dimensions, |
| 358 | + dataframe, |
| 359 | + queue, |
| 360 | + dtype, |
| 361 | +): |
| 362 | + if dimensions["n"] > 10000 and dtype == np.float32: |
| 363 | + pytest.skip("Skipping large float32 test due to expected precision issues") |
| 364 | + |
| 365 | + # Import spmd and batch algo |
| 366 | + from sklearnex.neighbors import NearestNeighbors as NearestNeighbors_Batch |
| 367 | + from sklearnex.spmd.neighbors import NearestNeighbors as NearestNeighbors_SPMD |
| 368 | + |
| 369 | + # Generate data and convert to dataframe |
| 370 | + X_train = _generate_statistic_data(dimensions["n"], dimensions["m"], dtype=dtype) |
| 371 | + |
| 372 | + local_dpt_X_train = _convert_to_dataframe( |
| 373 | + _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe |
| 374 | + ) |
| 375 | + |
| 376 | + # Ensure search results of batch algo match spmd |
| 377 | + spmd_model = NearestNeighbors_SPMD( |
| 378 | + n_neighbors=dimensions["k"], algorithm="brute" |
| 379 | + ).fit(local_dpt_X_train) |
| 380 | + batch_model = NearestNeighbors_Batch( |
| 381 | + n_neighbors=dimensions["k"], algorithm="brute" |
| 382 | + ).fit(X_train) |
| 383 | + spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_train) |
| 384 | + batch_dists, batch_indcs = batch_model.kneighbors(X_train) |
| 385 | + |
| 386 | + tol = 0.005 if dtype == np.float32 else 1e-6 |
| 387 | + _spmd_assert_allclose(spmd_indcs, batch_indcs) |
| 388 | + _spmd_assert_allclose(spmd_dists, batch_dists, rtol=tol, atol=tol) |
| 389 | + |
| 390 | + |
| 391 | +@pytest.mark.skipif( |
| 392 | + not _mpi_libs_and_gpu_available, |
| 393 | + reason="GPU device and MPI libs required for test", |
| 394 | +) |
| 395 | +@pytest.mark.parametrize( |
| 396 | + "dataframe,queue", |
| 397 | + get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"), |
| 398 | +) |
| 399 | +@pytest.mark.mpi |
| 400 | +def test_knn_spmd_empty_kneighbors(dataframe, queue): |
| 401 | + # Import spmd and batch algo |
| 402 | + from sklearnex.neighbors import NearestNeighbors as NearestNeighbors_Batch |
| 403 | + from sklearnex.spmd.neighbors import ( |
| 404 | + KNeighborsClassifier, |
| 405 | + KNeighborsRegressor, |
| 406 | + NearestNeighbors, |
| 407 | + ) |
| 408 | + |
| 409 | + # Create gold data and convert to dataframe |
| 410 | + X_train = np.array( |
| 411 | + [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2], [10, 10], [9, 9]] |
| 412 | + ) |
| 413 | + y_train = np.array([0, 1, 0, 1, 0, 1, 0, 1]) |
| 414 | + local_dpt_X_train = _convert_to_dataframe( |
| 415 | + _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe |
| 416 | + ) |
| 417 | + local_dpt_y_train = _convert_to_dataframe( |
| 418 | + _get_local_tensor(y_train), sycl_queue=queue, target_df=dataframe |
| 419 | + ) |
| 420 | + |
| 421 | + # Run each estimator without an input to kneighbors() and ensure functionality and equivalence |
| 422 | + for CurrentEstimator in [KNeighborsClassifier, KNeighborsRegressor, NearestNeighbors]: |
| 423 | + spmd_model = CurrentEstimator(n_neighbors=1, algorithm="brute").fit( |
| 424 | + local_dpt_X_train, local_dpt_y_train |
| 425 | + ) |
| 426 | + batch_model = NearestNeighbors_Batch(n_neighbors=1, algorithm="brute").fit( |
| 427 | + X_train, y_train |
| 428 | + ) |
| 429 | + spmd_dists, spmd_indcs = spmd_model.kneighbors() |
| 430 | + batch_dists, batch_indcs = batch_model.kneighbors() |
| 431 | + |
| 432 | + _spmd_assert_allclose(spmd_indcs, batch_indcs) |
| 433 | + _spmd_assert_allclose(spmd_dists, batch_dists) |
0 commit comments