From e04f05444c479c00c00f87cd23eae2e6049ceb18 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Fri, 22 Nov 2024 15:31:55 -0800 Subject: [PATCH 01/10] Further revisions for green CI on fp32 GPUs --- onedal/linear_model/linear_model.py | 6 +++--- sklearnex/linear_model/tests/test_logreg.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py index cde64cd5ed..3f256ebebe 100755 --- a/onedal/linear_model/linear_model.py +++ b/onedal/linear_model/linear_model.py @@ -198,6 +198,9 @@ def fit(self, X, y, queue=None): if not isinstance(X, np.ndarray): X = np.asarray(X) + policy = self._get_policy(queue, X, y) + X, y = _convert_to_supported(policy, X, y) + dtype = get_dtype(X) if dtype not in [np.float32, np.float64]: dtype = np.float64 @@ -207,11 +210,8 @@ def fit(self, X, y, queue=None): X, y = _check_X_y(X, y, force_all_finite=False, accept_2d_y=True) - policy = self._get_policy(queue, X, y) - self.n_features_in_ = _num_features(X, fallback_1d=True) - X, y = _convert_to_supported(policy, X, y) params = self._get_onedal_params(get_dtype(X)) X_table, y_table = to_table(X, y) diff --git a/sklearnex/linear_model/tests/test_logreg.py b/sklearnex/linear_model/tests/test_logreg.py index 65c7ea5d0f..5657fb8165 100755 --- a/sklearnex/linear_model/tests/test_logreg.py +++ b/sklearnex/linear_model/tests/test_logreg.py @@ -127,7 +127,7 @@ def test_csr(queue, dtype, dims): pred_sp = model_sp.predict(X_sp) prob_sp = model_sp.predict_proba(X_sp) - rtol = 2e-4 + rtol = 1e-3 if dtype == np.float32 else 2e-4 assert_allclose(pred, pred_sp, rtol=rtol) assert_allclose(prob, prob_sp, rtol=rtol) assert_allclose(model.coef_, model_sp.coef_, rtol=rtol) From c43738cc71b178c48f60df39f1b16e0c0959a184 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 25 Nov 2024 12:36:10 -0800 Subject: [PATCH 02/10] additional threshold updates and has_aspect checks --- onedal/linear_model/tests/test_linear_regression.py | 2 +- onedal/linear_model/tests/test_logistic_regression.py | 2 +- .../linear_model/tests/test_incremental_linear.py | 6 +++--- .../linear_model/tests/test_incremental_ridge.py | 11 ++++++++--- sklearnex/linear_model/tests/test_logreg.py | 2 +- .../decomposition/tests/test_incremental_pca.py | 8 ++++---- sklearnex/preview/linear_model/tests/test_ridge.py | 4 +++- .../tests/test_basic_statistics_spmd.py | 5 +++-- .../tests/test_incremental_basic_statistics_spmd.py | 2 +- sklearnex/spmd/cluster/tests/test_kmeans_spmd.py | 2 +- .../spmd/covariance/tests/test_covariance_spmd.py | 2 +- .../decomposition/tests/test_incremental_pca_spmd.py | 4 ++-- sklearnex/spmd/decomposition/tests/test_pca_spmd.py | 2 +- .../tests/test_incremental_linear_spmd.py | 4 ++-- .../linear_model/tests/test_linear_regression_spmd.py | 2 +- sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py | 6 +++--- 16 files changed, 36 insertions(+), 28 deletions(-) diff --git a/onedal/linear_model/tests/test_linear_regression.py b/onedal/linear_model/tests/test_linear_regression.py index 4653b07571..10403768b8 100755 --- a/onedal/linear_model/tests/test_linear_regression.py +++ b/onedal/linear_model/tests/test_linear_regression.py @@ -239,7 +239,7 @@ def test_multioutput_regression(queue, dtype, fit_intercept, problem_type): pred = model.predict(X, queue=queue) expected_pred = X @ model.coef_.T + model.intercept_.reshape((1, -1)) - tol = 1e-5 if pred.dtype == np.float32 else 1e-7 + tol = 2e-5 if pred.dtype == np.float32 else 1e-7 assert_allclose(pred, expected_pred, rtol=tol) # check that it also works when 'y' is a list of lists diff --git a/onedal/linear_model/tests/test_logistic_regression.py b/onedal/linear_model/tests/test_logistic_regression.py index 9866d5a7f2..af1fc40fab 100644 --- a/onedal/linear_model/tests/test_logistic_regression.py +++ b/onedal/linear_model/tests/test_logistic_regression.py @@ -89,7 +89,7 @@ def test_csr(queue, dtype, dims): model_sp.fit(X_sp, y, queue=queue) pred_sp = model_sp.predict(X_sp, queue=queue) - rtol = 2e-4 + rtol = 1e-3 assert_allclose(pred, pred_sp, rtol=rtol) assert_allclose(model.coef_, model_sp.coef_, rtol=rtol) assert_allclose(model.intercept_, model_sp.intercept_, rtol=rtol) diff --git a/sklearnex/linear_model/tests/test_incremental_linear.py b/sklearnex/linear_model/tests/test_incremental_linear.py index af25373ef6..19b2eeabb8 100644 --- a/sklearnex/linear_model/tests/test_incremental_linear.py +++ b/sklearnex/linear_model/tests/test_incremental_linear.py @@ -49,7 +49,7 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, fit_intercept, macro_block y_pred = inclin.predict(X_df) np_y_pred = _as_numpy(y_pred) - tol = 2e-6 if dtype == np.float32 else 1e-7 + tol = 2e-6 if y_pred.dtype == np.float32 else 1e-7 assert_allclose(inclin.coef_, [1], atol=tol) if fit_intercept: assert_allclose(inclin.intercept_, [0], atol=tol) @@ -89,7 +89,7 @@ def test_sklearnex_partial_fit_on_gold_data( np_y_pred = _as_numpy(y_pred) assert inclin.n_features_in_ == 1 - tol = 1e-5 if dtype == np.float32 else 1e-7 + tol = 1e-5 if y_pred.dtype == np.float32 else 1e-7 assert_allclose(inclin.coef_, [[1]], atol=tol) if fit_intercept: assert_allclose(inclin.intercept_, 3, atol=tol) @@ -131,7 +131,7 @@ def test_sklearnex_partial_fit_multitarget_on_gold_data( assert inclin.n_features_in_ == 2 tol = 1e-7 - if dtype == np.float32: + if y_pred.dtype == np.float32: tol = 7e-6 if _IS_INTEL else 2e-5 assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol) diff --git a/sklearnex/linear_model/tests/test_incremental_ridge.py b/sklearnex/linear_model/tests/test_incremental_ridge.py index adcd5349ed..66305695d9 100644 --- a/sklearnex/linear_model/tests/test_incremental_ridge.py +++ b/sklearnex/linear_model/tests/test_incremental_ridge.py @@ -76,10 +76,13 @@ def test_inc_ridge_fit_coefficients( coefficients_manual, intercept_manual = _compute_ridge_coefficients( X, y, alpha, fit_intercept ) + + tol = 2e-4 if inc_ridge.coef_.dtype == np.float32 else 1e-6 + if fit_intercept: - assert_allclose(inc_ridge.intercept_, intercept_manual, rtol=1e-6, atol=1e-6) + assert_allclose(inc_ridge.intercept_, intercept_manual, rtol=tol, atol=tol) - assert_allclose(inc_ridge.coef_, coefficients_manual, rtol=1e-6, atol=1e-6) + assert_allclose(inc_ridge.coef_, coefficients_manual, rtol=tol, atol=tol) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @pytest.mark.parametrize("batch_size", [2, 5]) @@ -149,5 +152,7 @@ def test_inc_ridge_predict_after_fit(dataframe, queue, fit_intercept): y_pred_manual = np.dot(X, coefficients_manual) if fit_intercept: y_pred_manual += intercept_manual + + tol = 1e-5 if inc_ridge.coef_.dtype == np.float32 else 1e-6 - assert_allclose(_as_numpy(y_pred), y_pred_manual, rtol=1e-6, atol=1e-6) + assert_allclose(_as_numpy(y_pred), y_pred_manual, rtol=tol, atol=tol) diff --git a/sklearnex/linear_model/tests/test_logreg.py b/sklearnex/linear_model/tests/test_logreg.py index 5657fb8165..fe0c1b10f3 100755 --- a/sklearnex/linear_model/tests/test_logreg.py +++ b/sklearnex/linear_model/tests/test_logreg.py @@ -127,7 +127,7 @@ def test_csr(queue, dtype, dims): pred_sp = model_sp.predict(X_sp) prob_sp = model_sp.predict_proba(X_sp) - rtol = 1e-3 if dtype == np.float32 else 2e-4 + rtol = 1e-3 if pred_sp.dtype == np.float32 else 2e-4 assert_allclose(pred, pred_sp, rtol=rtol) assert_allclose(prob, prob_sp, rtol=rtol) assert_allclose(model.coef_, model_sp.coef_, rtol=rtol) diff --git a/sklearnex/preview/decomposition/tests/test_incremental_pca.py b/sklearnex/preview/decomposition/tests/test_incremental_pca.py index 67929bfac8..920dd19b19 100644 --- a/sklearnex/preview/decomposition/tests/test_incremental_pca.py +++ b/sklearnex/preview/decomposition/tests/test_incremental_pca.py @@ -200,7 +200,7 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, whiten, num_blocks X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) transformed_data = incpca.transform(X_df) - check_pca_on_gold_data(incpca, dtype, whiten, transformed_data) + check_pca_on_gold_data(incpca, transformed_data.dtype, whiten, transformed_data) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -217,7 +217,7 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, whiten, num_blocks, dtype) incpca.fit(X_df) transformed_data = incpca.transform(X_df) - check_pca_on_gold_data(incpca, dtype, whiten, transformed_data) + check_pca_on_gold_data(incpca, transformed_data.dtype, whiten, transformed_data) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -235,7 +235,7 @@ def test_sklearnex_fit_transform_on_gold_data( X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) transformed_data = incpca.fit_transform(X_df) - check_pca_on_gold_data(incpca, dtype, whiten, transformed_data) + check_pca_on_gold_data(incpca, transformed_data.dtype, whiten, transformed_data) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -263,4 +263,4 @@ def test_sklearnex_partial_fit_on_random_data( X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) transformed_data = incpca.transform(X_df) - check_pca(incpca, dtype, whiten, X, transformed_data) + check_pca(incpca, transformed_data.dtype, whiten, X, transformed_data) diff --git a/sklearnex/preview/linear_model/tests/test_ridge.py b/sklearnex/preview/linear_model/tests/test_ridge.py index c4252b9e90..9ff9adb4df 100644 --- a/sklearnex/preview/linear_model/tests/test_ridge.py +++ b/sklearnex/preview/linear_model/tests/test_ridge.py @@ -64,8 +64,10 @@ def test_ridge_coefficients(dataframe, queue, sample_size, feature_size, alpha): inverse_term = numpy.linalg.inv(numpy.dot(X.T, X) + lambda_identity) xt_y = numpy.dot(X.T, y) coefficients_manual = numpy.dot(inverse_term, xt_y) + + tol = 1e-5 if ridge_reg.coef_.dtype == np.float32 else 1e-6 - assert_allclose(ridge_reg.coef_, coefficients_manual, rtol=1e-6, atol=1e-6) + assert_allclose(ridge_reg.coef_, coefficients_manual, rtol=tol, atol=tol) if daal_check_version((2024, "P", 600)): diff --git a/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py b/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py index d2b0cc5704..f23df3eb36 100644 --- a/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +++ b/sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py @@ -65,8 +65,9 @@ def test_basic_stats_spmd_gold(dataframe, queue): spmd_result = BasicStatistics_SPMD().fit(local_dpt_data) batch_result = BasicStatistics_Batch().fit(data) + tol = 1e-7 if queue.sycl_device.has_aspect_fp64 else 1e-6 for option in options_and_tests: - assert_allclose(getattr(spmd_result, option), getattr(batch_result, option)) + assert_allclose(getattr(spmd_result, option), getattr(batch_result, option), rtol=tol) @pytest.mark.skipif( @@ -97,7 +98,7 @@ def test_basic_stats_spmd_synthetic(n_samples, n_features, dataframe, queue, dty spmd_result = BasicStatistics_SPMD().fit(local_dpt_data) batch_result = BasicStatistics_Batch().fit(data) - tol = 1e-5 if dtype == np.float32 else 1e-7 + tol = 1e-5 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 for option in options_and_tests: assert_allclose( getattr(spmd_result, option), diff --git a/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py b/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py index e66836eb30..92b787899e 100644 --- a/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +++ b/sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py @@ -260,7 +260,7 @@ def test_incremental_basic_statistics_partial_fit_spmd_synthetic( IncrementalBasicStatistics as IncrementalBasicStatistics_SPMD, ) - tol = 2e-3 if dtype == np.float32 else 1e-7 + tol = 2e-3 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 # Create gold data and process into dpt data = _generate_statistic_data(n_samples, n_features, dtype=dtype) diff --git a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py index 1b68bbbd2c..6f54a930fb 100644 --- a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +++ b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py @@ -145,7 +145,7 @@ def test_kmeans_spmd_synthetic( n_clusters=n_clusters, init=spmd_model_init.cluster_centers_, random_state=0 ).fit(X_train) - atol = 1e-5 if dtype == np.float32 else 1e-7 + atol = 1e-5 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 _assert_unordered_allclose( spmd_model.cluster_centers_, batch_model.cluster_centers_, atol=atol ) diff --git a/sklearnex/spmd/covariance/tests/test_covariance_spmd.py b/sklearnex/spmd/covariance/tests/test_covariance_spmd.py index 1b97c831f5..e1e49f549c 100644 --- a/sklearnex/spmd/covariance/tests/test_covariance_spmd.py +++ b/sklearnex/spmd/covariance/tests/test_covariance_spmd.py @@ -102,6 +102,6 @@ def test_covariance_spmd_synthetic( ) batch_result = EmpiricalCovariance_Batch(assume_centered=assume_centered).fit(data) - atol = 1e-5 if dtype == np.float32 else 1e-7 + atol = 1e-5 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 assert_allclose(spmd_result.covariance_, batch_result.covariance_, atol=atol) assert_allclose(spmd_result.location_, batch_result.location_, atol=atol) diff --git a/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py b/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py index 776cabfa7b..90045b9127 100644 --- a/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +++ b/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py @@ -174,7 +174,7 @@ def test_incremental_pca_fit_spmd_random( from sklearnex.spmd.decomposition import IncrementalPCA as IncrementalPCA_SPMD # Increased test dataset size requires a higher tol setting in comparison to other tests - tol = 7e-5 if dtype == np.float32 else 1e-7 + tol = 7e-5 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 # Create data and process into dpt X = _generate_statistic_data(num_samples, num_features, dtype) @@ -233,7 +233,7 @@ def test_incremental_pca_partial_fit_spmd_random( from sklearnex.preview.decomposition import IncrementalPCA from sklearnex.spmd.decomposition import IncrementalPCA as IncrementalPCA_SPMD - tol = 3e-4 if dtype == np.float32 else 1e-7 + tol = 3e-4 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 # Create data and process into dpt X = _generate_statistic_data(num_samples, num_features, dtype) diff --git a/sklearnex/spmd/decomposition/tests/test_pca_spmd.py b/sklearnex/spmd/decomposition/tests/test_pca_spmd.py index ba9682e0e9..a334599284 100644 --- a/sklearnex/spmd/decomposition/tests/test_pca_spmd.py +++ b/sklearnex/spmd/decomposition/tests/test_pca_spmd.py @@ -116,7 +116,7 @@ def test_pca_spmd_synthetic( spmd_result = PCA_SPMD(n_components=n_components, whiten=whiten).fit(local_dpt_data) batch_result = PCA_Batch(n_components=n_components, whiten=whiten).fit(data) - tol = 1e-3 if dtype == np.float32 else 1e-7 + tol = 1e-3 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 assert_allclose(spmd_result.mean_, batch_result.mean_, atol=tol) assert_allclose(spmd_result.components_, batch_result.components_, atol=tol, rtol=tol) assert_allclose(spmd_result.singular_values_, batch_result.singular_values_, atol=tol) diff --git a/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py b/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py index 3228b52c54..c88d57975a 100644 --- a/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +++ b/sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py @@ -206,7 +206,7 @@ def test_incremental_linear_regression_fit_spmd_random( IncrementalLinearRegression as IncrementalLinearRegression_SPMD, ) - tol = 2e-4 if dtype == np.float32 else 1e-7 + tol = 2e-4 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 # Generate random data and process into dpt X_train, X_test, y_train, _ = _generate_regression_data( @@ -277,7 +277,7 @@ def test_incremental_linear_regression_partial_fit_spmd_random( IncrementalLinearRegression as IncrementalLinearRegression_SPMD, ) - tol = 3e-4 if dtype == np.float32 else 1e-7 + tol = 3e-4 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 # Generate random data and process into dpt X_train, X_test, y_train, _ = _generate_regression_data( diff --git a/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py b/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py index 0d50f5fef6..1d3baa8c63 100644 --- a/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +++ b/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py @@ -134,7 +134,7 @@ def test_linear_spmd_synthetic(n_samples, n_features, dataframe, queue, dtype): spmd_model = LinearRegression_SPMD().fit(local_dpt_X_train, local_dpt_y_train) batch_model = LinearRegression_Batch().fit(X_train, y_train) - tol = 1e-3 if dtype == np.float32 else 1e-7 + tol = 1e-3 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 assert_allclose(spmd_model.coef_, batch_model.coef_, rtol=tol, atol=tol) assert_allclose(spmd_model.intercept_, batch_model.intercept_, rtol=tol, atol=tol) diff --git a/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py b/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py index 24112a362e..0e906daed2 100644 --- a/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +++ b/sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py @@ -155,7 +155,7 @@ def test_knncls_spmd_synthetic( batch_result = batch_model.predict(X_test) tol = 1e-4 - if dtype == np.float64: + if dtype == np.float64 and queue.sycl_device.has_aspect_fp64: _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True) _assert_unordered_allclose( spmd_dists, batch_dists, localize=True, rtol=tol, atol=tol @@ -279,8 +279,8 @@ def test_knnreg_spmd_synthetic( spmd_result = spmd_model.predict(local_dpt_X_test) batch_result = batch_model.predict(X_test) - tol = 0.005 if dtype == np.float32 else 1e-4 - if dtype == np.float64: + tol = 0.005 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-4 + if dtype == np.float64 and queue.sycl_device.has_aspect_fp64: _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True) _assert_unordered_allclose( spmd_dists, batch_dists, localize=True, rtol=tol, atol=tol From 6c1ad69674b899cce9394bd6612d35b71ba87df6 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 25 Nov 2024 13:48:00 -0800 Subject: [PATCH 03/10] ridge nump as np --- .../preview/linear_model/tests/test_ridge.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sklearnex/preview/linear_model/tests/test_ridge.py b/sklearnex/preview/linear_model/tests/test_ridge.py index 9ff9adb4df..6d18d51c88 100644 --- a/sklearnex/preview/linear_model/tests/test_ridge.py +++ b/sklearnex/preview/linear_model/tests/test_ridge.py @@ -14,7 +14,7 @@ # limitations under the License. # =============================================================================== -import numpy +import numpy as np import pytest from numpy.testing import assert_allclose from sklearn.exceptions import NotFittedError @@ -30,8 +30,8 @@ def test_sklearnex_import_ridge(dataframe, queue): from sklearnex.preview.linear_model import Ridge - X = numpy.array([[1, 1], [1, 2], [2, 2], [2, 3]]) - y = numpy.dot(X, numpy.array([1, 2])) + 3 + X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) + y = np.dot(X, np.array([1, 2])) + 3 X_c = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) y_c = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) ridge_reg = Ridge(alpha=0.5).fit(X_c, y_c) @@ -52,18 +52,18 @@ def test_sklearnex_import_ridge(dataframe, queue): def test_ridge_coefficients(dataframe, queue, sample_size, feature_size, alpha): from sklearnex.preview.linear_model import Ridge - X = numpy.random.rand(sample_size, feature_size) - y = numpy.random.rand(sample_size) + X = np.random.rand(sample_size, feature_size) + y = np.random.rand(sample_size) X_c = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) y_c = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) ridge_reg = Ridge(fit_intercept=False, alpha=alpha).fit(X_c, y_c) # computing the coefficients manually # using the normal equation formula: (X^T * X + lambda * I)^-1 * X^T * y - lambda_identity = alpha * numpy.eye(X.shape[1]) - inverse_term = numpy.linalg.inv(numpy.dot(X.T, X) + lambda_identity) - xt_y = numpy.dot(X.T, y) - coefficients_manual = numpy.dot(inverse_term, xt_y) + lambda_identity = alpha * np.eye(X.shape[1]) + inverse_term = np.linalg.inv(np.dot(X.T, X) + lambda_identity) + xt_y = np.dot(X.T, y) + coefficients_manual = np.dot(inverse_term, xt_y) tol = 1e-5 if ridge_reg.coef_.dtype == np.float32 else 1e-6 @@ -80,7 +80,7 @@ def test_ridge_score_before_fit(dataframe, queue): model = Ridge(fit_intercept=True, alpha=0.5) - X, y = numpy.random.rand(sample_count, feature_count), numpy.random.rand( + X, y = np.random.rand(sample_count, feature_count), np.random.rand( sample_count ) X_c = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) @@ -97,7 +97,7 @@ def test_ridge_predict_before_fit(dataframe, queue): model = Ridge(fit_intercept=True, alpha=0.5) - X = numpy.random.rand(sample_count, feature_count) + X = np.random.rand(sample_count, feature_count) X_c = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) with pytest.raises(NotFittedError): From 0e080fb439d324cfdbf9a64afff65385887e3fdf Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 25 Nov 2024 14:23:10 -0800 Subject: [PATCH 04/10] minor threshold adjustments --- sklearnex/linear_model/tests/test_incremental_ridge.py | 4 +++- sklearnex/spmd/covariance/tests/test_covariance_spmd.py | 5 +++-- .../covariance/tests/test_incremental_covariance_spmd.py | 2 +- .../spmd/decomposition/tests/test_incremental_pca_spmd.py | 2 +- sklearnex/spmd/decomposition/tests/test_pca_spmd.py | 8 +++++--- .../linear_model/tests/test_linear_regression_spmd.py | 6 ++++-- 6 files changed, 17 insertions(+), 10 deletions(-) diff --git a/sklearnex/linear_model/tests/test_incremental_ridge.py b/sklearnex/linear_model/tests/test_incremental_ridge.py index 66305695d9..5bb286bff1 100644 --- a/sklearnex/linear_model/tests/test_incremental_ridge.py +++ b/sklearnex/linear_model/tests/test_incremental_ridge.py @@ -109,8 +109,10 @@ def test_inc_ridge_partial_fit_coefficients(dataframe, queue, alpha, batch_size) inverse_term = np.linalg.inv(np.dot(X.T, X) + lambda_identity) xt_y = np.dot(X.T, y) coefficients_manual = np.dot(inverse_term, xt_y) + + tol = 5e-3 if inc_ridge.coef_.dtype == np.float32 else 1e-6 - assert_allclose(inc_ridge.coef_, coefficients_manual, rtol=1e-6, atol=1e-6) + assert_allclose(inc_ridge.coef_, coefficients_manual, rtol=tol, atol=tol) def test_inc_ridge_score_before_fit(): X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) diff --git a/sklearnex/spmd/covariance/tests/test_covariance_spmd.py b/sklearnex/spmd/covariance/tests/test_covariance_spmd.py index e1e49f549c..43b258be72 100644 --- a/sklearnex/spmd/covariance/tests/test_covariance_spmd.py +++ b/sklearnex/spmd/covariance/tests/test_covariance_spmd.py @@ -64,8 +64,9 @@ def test_covariance_spmd_gold(dataframe, queue): spmd_result = EmpiricalCovariance_SPMD().fit(local_dpt_data) batch_result = EmpiricalCovariance_Batch().fit(data) - assert_allclose(spmd_result.covariance_, batch_result.covariance_) - assert_allclose(spmd_result.location_, batch_result.location_) + atol = 1e-7 if queue.sycl_device.has_aspect_fp64 else 1e-5 + assert_allclose(spmd_result.covariance_, batch_result.covariance_, atol=atol) + assert_allclose(spmd_result.location_, batch_result.location_, atol=atol) @pytest.mark.skipif( diff --git a/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py b/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py index 649d01e43d..a83ba9e0f2 100644 --- a/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +++ b/sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py @@ -178,7 +178,7 @@ def test_incremental_covariance_partial_fit_spmd_synthetic( inccov.fit(dpt_data) - tol = 1e-7 + tol = 1e-7 if queue.sycl_device.has_aspect_fp64 else 1e-6 assert_allclose(inccov_spmd.covariance_, inccov.covariance_, atol=tol) assert_allclose(inccov_spmd.location_, inccov.location_, atol=tol) diff --git a/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py b/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py index 90045b9127..38c0e50c12 100644 --- a/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +++ b/sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py @@ -174,7 +174,7 @@ def test_incremental_pca_fit_spmd_random( from sklearnex.spmd.decomposition import IncrementalPCA as IncrementalPCA_SPMD # Increased test dataset size requires a higher tol setting in comparison to other tests - tol = 7e-5 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 + tol = 3e-2 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 1e-7 # Create data and process into dpt X = _generate_statistic_data(num_samples, num_features, dtype) diff --git a/sklearnex/spmd/decomposition/tests/test_pca_spmd.py b/sklearnex/spmd/decomposition/tests/test_pca_spmd.py index a334599284..c755745919 100644 --- a/sklearnex/spmd/decomposition/tests/test_pca_spmd.py +++ b/sklearnex/spmd/decomposition/tests/test_pca_spmd.py @@ -65,16 +65,18 @@ def test_pca_spmd_gold(dataframe, queue): spmd_result = PCA_SPMD(n_components=2).fit(local_dpt_data) batch_result = PCA_Batch(n_components=2).fit(data) + tol = 1e-7 if queue.sycl_device.has_aspect_fp64 else 1e-5 + assert_allclose(spmd_result.mean_, batch_result.mean_) assert_allclose(spmd_result.components_, batch_result.components_) - assert_allclose(spmd_result.singular_values_, batch_result.singular_values_) + assert_allclose(spmd_result.singular_values_, batch_result.singular_values_, rtol=tol) assert_allclose( spmd_result.noise_variance_, batch_result.noise_variance_, - atol=1e-7, + atol=tol, ) assert_allclose( - spmd_result.explained_variance_ratio_, batch_result.explained_variance_ratio_ + spmd_result.explained_variance_ratio_, batch_result.explained_variance_ratio_, rtol=tol ) diff --git a/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py b/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py index 1d3baa8c63..8bbe78fa45 100644 --- a/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +++ b/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py @@ -82,8 +82,10 @@ def test_linear_spmd_gold(dataframe, queue): spmd_model = LinearRegression_SPMD().fit(local_dpt_X_train, local_dpt_y_train) batch_model = LinearRegression_Batch().fit(X_train, y_train) - assert_allclose(spmd_model.coef_, batch_model.coef_) - assert_allclose(spmd_model.intercept_, batch_model.intercept_) + tol = 1e-7 if queue.sycl_device.has_aspect_fp64 else 1e-5 + + assert_allclose(spmd_model.coef_, batch_model.coef_, rtol=tol) + assert_allclose(spmd_model.intercept_, batch_model.intercept_, rtol=tol) # ensure predictions of batch algo match spmd spmd_result = spmd_model.predict(local_dpt_X_test) From 3f1fc5c286929443acf794bda9a34d83e1fa5bed Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 25 Nov 2024 14:26:46 -0800 Subject: [PATCH 05/10] unifying cluster tests (non int) --- .../spmd/cluster/tests/test_dbscan_spmd.py | 2 +- .../spmd/cluster/tests/test_kmeans_spmd.py | 27 +++++++++---------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py b/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py index 5d0a531dc4..80272c2ca4 100644 --- a/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +++ b/sklearnex/spmd/cluster/tests/test_dbscan_spmd.py @@ -43,7 +43,7 @@ def test_dbscan_spmd_gold(dataframe, queue): from sklearnex.cluster import DBSCAN as DBSCAN_Batch from sklearnex.spmd.cluster import DBSCAN as DBSCAN_SPMD - data = np.array([[1, 2], [2, 2], [2, 3], [8, 7], [8, 8], [25, 80]]) + data = np.array([[1., 2.], [2., 2.], [2., 3.], [8., 7.], [8., 8.], [25., 80.]]) local_dpt_data = _convert_to_dataframe( _get_local_tensor(data), sycl_queue=queue, target_df=dataframe diff --git a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py index 2d38a343c9..6913a7ad31 100644 --- a/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +++ b/sklearnex/spmd/cluster/tests/test_kmeans_spmd.py @@ -47,22 +47,21 @@ def test_kmeans_spmd_gold(dataframe, queue): X_train = np.array( [ - [1, 2], - [2, 2], - [2, 3], - [8, 7], - [8, 8], - [25, 80], - [5, 65], - [2, 8], - [1, 3], - [2, 2], - [1, 3], - [2, 2], + [1., 2.], + [2., 2.], + [2., 3.], + [8., 7.], + [8., 8.], + [25., 80.], + [5., 65.], + [2., 8.], + [1., 3.], + [2., 2.], + [1., 3.], + [2., 2.], ], - dtype=np.float64, ) - X_test = np.array([[0, 0], [12, 3], [2, 2], [7, 8]], dtype=np.float64) + X_test = np.array([[0., 0.], [12., 3.], [2., 2.], [7., 8.]]) local_dpt_X_train = _convert_to_dataframe( _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe From 981adeabbddb90aac52298e3061925e945e88b59 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 25 Nov 2024 17:01:49 -0800 Subject: [PATCH 06/10] debugging for windows --- tests/run_examples.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/run_examples.py b/tests/run_examples.py index ba84c26968..134ff57d18 100755 --- a/tests/run_examples.py +++ b/tests/run_examples.py @@ -79,8 +79,10 @@ available_devices = ["cpu"] gpu_available = False +print("dpctl available: {}".format(dpctl_available)) if dpctl_available: import dpctl + print("dpctl had gpu devices: {}".format(dpctl.has_gpu_devices())) if dpctl.has_gpu_devices(): gpu_available = True From ed5c52852f69c0aa4e0c53e80f89ea5552ca58a0 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 25 Nov 2024 17:13:12 -0800 Subject: [PATCH 07/10] minor threshold change --- onedal/linear_model/tests/test_logistic_regression.py | 2 +- .../spmd/linear_model/tests/test_linear_regression_spmd.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/linear_model/tests/test_logistic_regression.py b/onedal/linear_model/tests/test_logistic_regression.py index af1fc40fab..cd905bc11c 100644 --- a/onedal/linear_model/tests/test_logistic_regression.py +++ b/onedal/linear_model/tests/test_logistic_regression.py @@ -89,7 +89,7 @@ def test_csr(queue, dtype, dims): model_sp.fit(X_sp, y, queue=queue) pred_sp = model_sp.predict(X_sp, queue=queue) - rtol = 1e-3 + rtol = 2e-3 assert_allclose(pred, pred_sp, rtol=rtol) assert_allclose(model.coef_, model_sp.coef_, rtol=rtol) assert_allclose(model.intercept_, model_sp.intercept_, rtol=rtol) diff --git a/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py b/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py index 8bbe78fa45..1af05c84c6 100644 --- a/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +++ b/sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py @@ -91,7 +91,7 @@ def test_linear_spmd_gold(dataframe, queue): spmd_result = spmd_model.predict(local_dpt_X_test) batch_result = batch_model.predict(X_test) - _spmd_assert_allclose(spmd_result, batch_result) + _spmd_assert_allclose(spmd_result, batch_result, rtol=tol) @pytest.mark.skipif( From e146568d89a5d5b9a46e8c01cc3e0cd4e296a486 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Mon, 25 Nov 2024 23:09:23 -0800 Subject: [PATCH 08/10] logreg threshold update and further windows example debug --- sklearnex/linear_model/tests/test_logreg.py | 2 +- tests/run_examples.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sklearnex/linear_model/tests/test_logreg.py b/sklearnex/linear_model/tests/test_logreg.py index fe0c1b10f3..1634bc3207 100755 --- a/sklearnex/linear_model/tests/test_logreg.py +++ b/sklearnex/linear_model/tests/test_logreg.py @@ -127,7 +127,7 @@ def test_csr(queue, dtype, dims): pred_sp = model_sp.predict(X_sp) prob_sp = model_sp.predict_proba(X_sp) - rtol = 1e-3 if pred_sp.dtype == np.float32 else 2e-4 + rtol = 1e-3 if (dtype == np.float32 or not queue.sycl_device.has_aspect_fp64) else 2e-4 assert_allclose(pred, pred_sp, rtol=rtol) assert_allclose(prob, prob_sp, rtol=rtol) assert_allclose(model.coef_, model_sp.coef_, rtol=rtol) diff --git a/tests/run_examples.py b/tests/run_examples.py index 134ff57d18..9de0ec6b86 100755 --- a/tests/run_examples.py +++ b/tests/run_examples.py @@ -79,7 +79,14 @@ available_devices = ["cpu"] gpu_available = False +import dpctl +import dpctl.tensor as dpt print("dpctl available: {}".format(dpctl_available)) +print("dpctl had gpu devices: {}".format(dpctl.has_gpu_devices())) + +if dpctl.has_gpu_devices(): + gpu_available = True + available_devices.append("gpu") if dpctl_available: import dpctl print("dpctl had gpu devices: {}".format(dpctl.has_gpu_devices())) From 175a3bc5784fe9a9c842baef3d905b648d529432 Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Tue, 26 Nov 2024 11:40:20 -0800 Subject: [PATCH 09/10] trying harder --- tests/run_examples.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/run_examples.py b/tests/run_examples.py index 9de0ec6b86..fa373c3c6f 100755 --- a/tests/run_examples.py +++ b/tests/run_examples.py @@ -79,11 +79,18 @@ available_devices = ["cpu"] gpu_available = False +import site +path_to_env = site.getsitepackages()[0] +path_to_libs = os.path.join(path_to_env, "Library", "bin") +try: + os.add_dll_directory(path_to_libs) +except FileNotFoundError: + print("FILENOTFOUNDERROR sklearnex") + import dpctl import dpctl.tensor as dpt print("dpctl available: {}".format(dpctl_available)) print("dpctl had gpu devices: {}".format(dpctl.has_gpu_devices())) - if dpctl.has_gpu_devices(): gpu_available = True available_devices.append("gpu") From 6a497d2f4eb704c15f0a7be9c1b39bd4acbc011b Mon Sep 17 00:00:00 2001 From: Ethan Glaser Date: Thu, 12 Dec 2024 16:02:52 -0800 Subject: [PATCH 10/10] add ridge revisions after rebase --- sklearnex/linear_model/tests/test_ridge.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearnex/linear_model/tests/test_ridge.py b/sklearnex/linear_model/tests/test_ridge.py index 9b617a0ad0..430cad4d28 100644 --- a/sklearnex/linear_model/tests/test_ridge.py +++ b/sklearnex/linear_model/tests/test_ridge.py @@ -129,8 +129,10 @@ def test_ridge_coefficients( X, y, alpha, fit_intercept=fit_intercept ) - assert_allclose(ridge_reg.coef_, coefficients_manual, rtol=1e-6, atol=1e-6) - assert_allclose(ridge_reg.intercept_, intercept_manual, rtol=1e-6, atol=1e-6) + tol = 1e-5 if ridge_reg.coef_.dtype == np.float32 else 1e-6 + + assert_allclose(ridge_reg.coef_, coefficients_manual, rtol=tol, atol=tol) + assert_allclose(ridge_reg.intercept_, intercept_manual, rtol=tol, atol=tol) @pytest.mark.skipif(