Skip to content

Commit 3f80048

Browse files
jtlaitnmaarnio
authored andcommitted
reverted back to include checks for simplex space. Changed closure check to closure of 1 or 100
1 parent 3722316 commit 3f80048

File tree

4 files changed

+27
-44
lines changed

4 files changed

+27
-44
lines changed
+14-11
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,30 @@
1-
import numpy as np
1+
from numbers import Number
2+
23
import pandas as pd
34
from beartype import beartype
4-
from beartype.typing import Optional
55

66
from eis_toolkit.exceptions import InvalidCompositionException, NumericValueSignException
77
from eis_toolkit.utilities.checks.dataframe import check_dataframe_contains_only_positive_numbers
88

99

1010
@beartype
11-
def check_in_simplex_sample_space(df: pd.DataFrame, expected_sum: Optional[np.float64] = None) -> None:
11+
def check_in_simplex_sample_space(df: pd.DataFrame, tolerance: Number = 0.0001) -> None:
1212
"""
1313
Check that the compositions represented by the data rows belong to a simplex sample space.
1414
15+
Checks that data has not NaN values.
1516
Checks that each compositional data point belongs to the set of positive real numbers.
16-
Checks that each composition is normalized to the same value.
17+
Checks that input dataframe is closed to either 1 or 100.
1718
1819
Args:
1920
df: The dataframe to check.
20-
expected_sum: The expected sum of each row. If None, simply checks that the sum of each row is equal.
21+
tolerance: Small tolerance value to allow floating-point imprecision.
2122
2223
Returns:
23-
True if values are valid and the sum of each row is the expected_sum.
24+
None.
2425
2526
Raises:
26-
InvalidCompositionException: Data is not normalized to the expected value.
27+
InvalidCompositionException: Data is not within the expected simplex sample space.
2728
NumericValueSignException: Data contains zeros or negative values.
2829
"""
2930
if df.isnull().values.any():
@@ -32,9 +33,11 @@ def check_in_simplex_sample_space(df: pd.DataFrame, expected_sum: Optional[np.fl
3233
if not check_dataframe_contains_only_positive_numbers(df):
3334
raise NumericValueSignException("Data contains zeros or negative values.")
3435

35-
df_sum = np.sum(df, axis=1)
36-
expected_sum = expected_sum if expected_sum is not None else df_sum.iloc[0]
37-
if len(df_sum[df_sum.iloc[:] != expected_sum]) != 0:
38-
raise InvalidCompositionException("Not each composition is normalized to the same value.")
36+
row_sums = df.sum(axis=1)
37+
closed_to_one = (row_sums - 1).abs() < tolerance
38+
closed_to_hundred = (row_sums - 100).abs() < tolerance
39+
40+
if not closed_to_one.all() and not closed_to_hundred.all():
41+
raise InvalidCompositionException(f"Input data is not closed to 1 or 100 within tolerance of {tolerance}.")
3942

4043
return None

tests/transformations/coda/alr_test.py

+5-16
Original file line numberDiff line numberDiff line change
@@ -9,34 +9,23 @@
99
SAMPLE_DATAFRAME = pd.DataFrame(sample_array, columns=["a", "b", "c", "d"])
1010

1111

12-
def test_alr_transform_simple():
13-
"""Test ALR transformation core functionality."""
14-
ones_df_4x4 = pd.DataFrame(np.ones((4, 4)), columns=["a", "b", "c", "d"])
15-
zeros_df_4x4 = pd.DataFrame(np.zeros((4, 3)), columns=["V1", "V2", "V3"])
16-
result = alr_transform(ones_df_4x4)
17-
pd.testing.assert_frame_equal(result, zeros_df_4x4)
18-
19-
2012
def test_alr_transform():
2113
"""Test ALR transformation core functionality."""
22-
arr = np.array([[1, 4, 1, 1], [2, 1, 2, 2]])
14+
arr = np.random.dirichlet(np.ones(4), size=4)
2315
df = pd.DataFrame(arr, columns=["a", "b", "c", "d"], dtype=np.float64)
2416

2517
result = alr_transform(df, column="b", keep_denominator_column=True)
2618
expected = pd.DataFrame(
27-
{
28-
"V1": [np.log(0.25), np.log(2)],
29-
"V2": [0, 0],
30-
"V3": [np.log(0.25), np.log(2)],
31-
"V4": [np.log(0.25), np.log(2)],
32-
},
19+
np.log(arr / arr[:, 1, None]),
20+
columns=["V1", "V2", "V3", "V4"],
3321
dtype=np.float64,
3422
)
3523
pd.testing.assert_frame_equal(result, expected)
3624

3725
result = alr_transform(df, column="b")
3826
expected = pd.DataFrame(
39-
{"V1": [np.log(0.25), np.log(2)], "V2": [np.log(0.25), np.log(2)], "V3": [np.log(0.25), np.log(2)]},
27+
np.log(np.delete(arr, 1, axis=1) / arr[:, 1, None]),
28+
columns=["V1", "V2", "V3"],
4029
dtype=np.float64,
4130
)
4231
pd.testing.assert_frame_equal(result, expected)

tests/transformations/coda/clr_test.py

+7-10
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,16 @@
99
SAMPLE_DATAFRAME = pd.DataFrame(sample_array, columns=["a", "b", "c", "d"])
1010

1111

12-
def test_clr_transform_simple():
13-
"""Test CLR transform core functionality."""
14-
ones_df_4x4 = pd.DataFrame(np.ones((4, 4)), columns=["a", "b", "c", "d"])
15-
zeros_df_4x4 = pd.DataFrame(np.zeros((4, 4)), columns=["V1", "V2", "V3", "V4"])
16-
result = clr_transform(ones_df_4x4)
17-
pd.testing.assert_frame_equal(result, zeros_df_4x4)
18-
19-
2012
def test_clr_transform():
2113
"""Test CLR transform core functionality."""
22-
result = clr_transform(SAMPLE_DATAFRAME)
14+
arr = np.random.dirichlet(np.ones(4), size=4)
15+
df = pd.DataFrame(arr, columns=["a", "b", "c", "d"], dtype=np.float64)
16+
result = clr_transform(df)
17+
geometric_means = np.prod(arr, axis=1) ** (1 / arr.shape[1])
2318
expected = pd.DataFrame(
24-
{"V1": [1.38, 1.29], "V2": [-0.30, -0.08], "V3": [0.10, -0.15], "V4": [-1.18, -1.06]}, dtype=np.float64
19+
np.log(arr / geometric_means[:, None]),
20+
columns=["V1", "V2", "V3", "V4"],
21+
dtype=np.float64,
2522
)
2623
pd.testing.assert_frame_equal(result, expected, atol=1e-2)
2724

tests/utilities/compositional_test.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ def test_compositional_data_invalid():
7777
def test_check_for_simplex_sample_space():
7878
"""Test whether or not a dataframe belongs to a simplex sample space is correctly identified."""
7979
unit_simplex_df = pd.DataFrame([[0.1, 0.2, 0.3, 0.4], [0.2, 0.3, 0.2, 0.3]])
80-
simplex_df = pd.DataFrame([[1, 2, 3, 4], [2, 3, 2, 3]], columns=["a", "b", "c", "d"])
8180
non_simplex_positive_df = pd.DataFrame([1, 2, 3, 4], [5, 6, 7, 8])
8281
non_positive_df = pd.DataFrame([-1, 2, 3, 4], [1, 2, 3, 4])
8382

@@ -87,13 +86,8 @@ def test_check_for_simplex_sample_space():
8786
with pytest.raises(NumericValueSignException):
8887
check_in_simplex_sample_space(non_positive_df)
8988

90-
with pytest.raises(InvalidCompositionException):
91-
check_in_simplex_sample_space(simplex_df, np.float64(100))
92-
9389
# Valid cases - assert no exception is raised
9490
try:
95-
check_in_simplex_sample_space(simplex_df)
96-
check_in_simplex_sample_space(simplex_df, np.float64(10))
97-
check_in_simplex_sample_space(unit_simplex_df, np.float64(1.0))
91+
check_in_simplex_sample_space(unit_simplex_df)
9892
except Exception as ex:
9993
assert False, f"{type(ex)}: {ex}"

0 commit comments

Comments
 (0)