Skip to content

Commit 1f908d8

Browse files
committed
test(exploratory-analyses): update PCA tests
1 parent a6423ee commit 1f908d8

File tree

1 file changed

+62
-20
lines changed

1 file changed

+62
-20
lines changed

tests/exploratory_analyses/pca_test.py

+62-20
Original file line numberDiff line numberDiff line change
@@ -19,35 +19,47 @@
1919
@pytest.mark.xfail(sys.platform == "win32", reason="Results deviate on Windows.", raises=AssertionError)
2020
def test_pca_numpy_array():
2121
"""Test that PCA function gives correct output for Numpy array input."""
22-
pca_array, explained_variances = compute_pca(DATA, 2)
22+
pca_array, principal_components, explained_variances, explained_variance_ratios = compute_pca(DATA, 2)
2323

24-
expected_pca_values = np.array([[-1.73205081, 1.11022302e-16], [0.0, 0.0], [1.73205081, 1.11022302e-16]])
25-
expected_explained_variances_values = [1.0, 4.10865055e-33]
24+
expected_pca_array_values = np.array([[-1.73205081, 1.11022302e-16], [0.0, 0.0], [1.73205081, 1.11022302e-16]])
25+
expected_component_values = np.array([[0.70711, 0.70711], [0.70711, -0.70711]])
26+
expected_explained_variance_ratios_values = [1.0, 4.10865055e-33]
2627

28+
np.testing.assert_equal(principal_components.size, 4)
2729
np.testing.assert_equal(explained_variances.size, 2)
30+
np.testing.assert_equal(explained_variance_ratios.size, 2)
2831
np.testing.assert_equal(pca_array.shape, DATA.shape)
2932

30-
np.testing.assert_array_almost_equal(pca_array, expected_pca_values, decimal=5)
31-
np.testing.assert_array_almost_equal(explained_variances, expected_explained_variances_values, decimal=5)
33+
np.testing.assert_array_almost_equal(pca_array, expected_pca_array_values, decimal=5)
34+
np.testing.assert_array_almost_equal(principal_components, expected_component_values, decimal=5)
35+
np.testing.assert_array_almost_equal(
36+
explained_variance_ratios, expected_explained_variance_ratios_values, decimal=5
37+
)
3238

3339

3440
@pytest.mark.xfail(sys.platform == "win32", reason="Results deviate on Windows.", raises=AssertionError)
3541
def test_pca_df():
3642
"""Test that PCA function gives correct output for DF input."""
3743
data_df = pd.DataFrame(data=DATA, columns=["A", "B"])
3844

39-
pca_df, explained_variances = compute_pca(data_df, 2)
45+
pca_df, principal_components, explained_variances, explained_variance_ratios = compute_pca(data_df, 2)
4046

4147
expected_columns = ["principal_component_1", "principal_component_2"]
4248
expected_pca_values = np.array([[-1.73205081, 1.11022302e-16], [0.0, 0.0], [1.73205081, 1.11022302e-16]])
43-
expected_explained_variances_values = [1.0, 4.10865055e-33]
49+
expected_component_values = np.array([[0.70711, 0.70711], [0.70711, -0.70711]])
50+
expected_explained_variance_ratios_values = [1.0, 4.10865055e-33]
4451

52+
np.testing.assert_equal(principal_components.size, 4)
4553
np.testing.assert_equal(explained_variances.size, 2)
54+
np.testing.assert_equal(explained_variance_ratios.size, 2)
4655
np.testing.assert_equal(list(pca_df.columns), expected_columns)
4756
np.testing.assert_equal(pca_df.shape, data_df.shape)
4857

4958
np.testing.assert_array_almost_equal(pca_df.values, expected_pca_values, decimal=5)
50-
np.testing.assert_array_almost_equal(explained_variances, expected_explained_variances_values, decimal=5)
59+
np.testing.assert_array_almost_equal(principal_components, expected_component_values, decimal=5)
60+
np.testing.assert_array_almost_equal(
61+
explained_variance_ratios, expected_explained_variance_ratios_values, decimal=5
62+
)
5163

5264

5365
@pytest.mark.xfail(sys.platform == "win32", reason="Results deviate on Windows.", raises=AssertionError)
@@ -57,66 +69,96 @@ def test_pca_gdf():
5769
data=DATA, columns=["A", "B"], geometry=[Point(1, 2), Point(2, 1), Point(3, 3)], crs="EPSG:4326"
5870
)
5971

60-
pca_gdf, explained_variances = compute_pca(data_gdf, 2)
72+
pca_gdf, principal_components, explained_variances, explained_variance_ratios = compute_pca(data_gdf, 2)
6173

6274
expected_columns = ["principal_component_1", "principal_component_2", "geometry"]
6375
expected_pca_values = np.array([[-1.73205081, 1.11022302e-16], [0.0, 0.0], [1.73205081, 1.11022302e-16]])
64-
expected_explained_variances_values = [1.0, 4.10865055e-33]
76+
expected_component_values = np.array([[0.70711, 0.70711], [0.70711, -0.70711]])
77+
expected_explained_variance_ratios_values = [1.0, 4.10865055e-33]
6578

79+
np.testing.assert_equal(principal_components.size, 4)
6680
np.testing.assert_equal(explained_variances.size, 2)
81+
np.testing.assert_equal(explained_variance_ratios.size, 2)
6782
np.testing.assert_equal(list(pca_gdf.columns), expected_columns)
6883
np.testing.assert_equal(pca_gdf.shape, data_gdf.shape)
6984

7085
np.testing.assert_array_almost_equal(pca_gdf.drop(columns=["geometry"]).values, expected_pca_values, decimal=5)
71-
np.testing.assert_array_almost_equal(explained_variances, expected_explained_variances_values, decimal=5)
86+
np.testing.assert_array_almost_equal(principal_components, expected_component_values, decimal=5)
87+
np.testing.assert_array_almost_equal(
88+
explained_variance_ratios, expected_explained_variance_ratios_values, decimal=5
89+
)
7290

7391

7492
@pytest.mark.xfail(sys.platform == "win32", reason="Results deviate on Windows.", raises=AssertionError)
7593
def test_pca_with_nan_removal():
7694
"""Test that PCA function gives correct output for Numpy array input that has NaN values and remove strategy."""
7795
data = np.array([[1, 1], [2, np.nan], [3, 3]])
78-
pca_array, explained_variances = compute_pca(data, 2, nodata_handling="remove")
96+
pca_array, principal_components, explained_variances, explained_variance_ratios = compute_pca(
97+
data, 2, nodata_handling="remove"
98+
)
7999

80100
expected_pca_values = np.array([[-1.414, 0.0], [np.nan, np.nan], [1.414, 0.0]])
81-
expected_explained_variances_values = [1.0, 0.0]
101+
expected_component_values = np.array([[0.70711, 0.70711], [-0.70711, 0.70711]])
102+
expected_explained_variance_ratios_values = [1.0, 0.0]
82103

104+
np.testing.assert_equal(principal_components.size, 4)
83105
np.testing.assert_equal(explained_variances.size, 2)
106+
np.testing.assert_equal(explained_variance_ratios.size, 2)
84107
np.testing.assert_equal(pca_array.shape, DATA.shape)
85108

86109
np.testing.assert_array_almost_equal(pca_array, expected_pca_values, decimal=3)
87-
np.testing.assert_array_almost_equal(explained_variances, expected_explained_variances_values, decimal=3)
110+
np.testing.assert_array_almost_equal(principal_components, expected_component_values, decimal=3)
111+
np.testing.assert_array_almost_equal(
112+
explained_variance_ratios, expected_explained_variance_ratios_values, decimal=3
113+
)
88114

89115

90116
@pytest.mark.xfail(sys.platform == "win32", reason="Results deviate on Windows.", raises=AssertionError)
91117
def test_pca_with_nan_replace():
92118
"""Test that PCA function gives correct output for Numpy array input that has NaN values and replace strategy."""
93119
data = np.array([[1, 1], [2, np.nan], [3, 3]])
94-
pca_array, explained_variances = compute_pca(data, 2, nodata_handling="replace")
120+
pca_array, principal_components, explained_variances, explained_variance_ratios = compute_pca(
121+
data, 2, nodata_handling="replace"
122+
)
95123

96124
expected_pca_values = np.array([[-1.73205, 1.11022e-16], [0, 0], [1.73205, 1.11022e-16]])
97-
expected_explained_variances_values = [1.0, 4.10865e-33]
125+
expected_component_values = np.array([[0.707, 0.707], [0.707, -0.707]])
126+
expected_explained_variance_ratios_values = [1.0, 4.10865e-33]
98127

128+
np.testing.assert_equal(principal_components.size, 4)
99129
np.testing.assert_equal(explained_variances.size, 2)
130+
np.testing.assert_equal(explained_variance_ratios.size, 2)
100131
np.testing.assert_equal(pca_array.shape, DATA.shape)
101132

102133
np.testing.assert_array_almost_equal(pca_array, expected_pca_values, decimal=3)
103-
np.testing.assert_array_almost_equal(explained_variances, expected_explained_variances_values, decimal=3)
134+
np.testing.assert_array_almost_equal(principal_components, expected_component_values, decimal=3)
135+
np.testing.assert_array_almost_equal(
136+
explained_variance_ratios, expected_explained_variance_ratios_values, decimal=3
137+
)
104138

105139

106140
@pytest.mark.xfail(sys.platform == "win32", reason="Results deviate on Windows.", raises=AssertionError)
107141
def test_pca_with_nodata_removal():
108142
"""Test that PCA function gives correct output for input that has specified nodata values and removal strategy."""
109143
data = np.array([[1, 1], [2, -9999], [3, 3]])
110-
pca_array, explained_variances = compute_pca(data, 2, nodata_handling="remove", nodata=-9999)
144+
pca_array, principal_components, explained_variances, explained_variance_ratios = compute_pca(
145+
data, 2, nodata_handling="remove", nodata=-9999
146+
)
111147

112148
expected_pca_values = np.array([[-1.414, 0.0], [np.nan, np.nan], [1.414, 0.0]])
113-
expected_explained_variances_values = [1.0, 0.0]
149+
expected_component_values = np.array([[0.707, 0.707], [-0.707, 0.707]])
150+
expected_explained_variance_ratios_values = [1.0, 0.0]
114151

152+
np.testing.assert_equal(principal_components.size, 4)
115153
np.testing.assert_equal(explained_variances.size, 2)
154+
np.testing.assert_equal(explained_variance_ratios.size, 2)
116155
np.testing.assert_equal(pca_array.shape, DATA.shape)
117156

118157
np.testing.assert_array_almost_equal(pca_array, expected_pca_values, decimal=3)
119-
np.testing.assert_array_almost_equal(explained_variances, expected_explained_variances_values, decimal=3)
158+
np.testing.assert_array_almost_equal(principal_components, expected_component_values, decimal=3)
159+
np.testing.assert_array_almost_equal(
160+
explained_variance_ratios, expected_explained_variance_ratios_values, decimal=3
161+
)
120162

121163

122164
def test_pca_empty_data():

0 commit comments

Comments
 (0)