Skip to content

Commit 014b825

Browse files
authored
Merge pull request #244 from GispoCoding/218-coda-transforms
218 coda transforms
2 parents 1eeeb80 + 6c14a1a commit 014b825

26 files changed

+2138
-3
lines changed

docs/transformations/coda/alr.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Additive logratio transform
2+
3+
::: eis_toolkit.transformations.coda.alr

docs/transformations/coda/clr.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Centered logratio transform
2+
3+
::: eis_toolkit.transformations.coda.clr

docs/transformations/coda/ilr.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Isometric logratio transform
2+
3+
::: eis_toolkit.transformations.coda.ilr

docs/transformations/coda/pairwise.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Pairwise logratio transform
2+
3+
::: eis_toolkit.transformations.coda.pairwise

docs/transformations/coda/plr.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Pivot logratio transform
2+
3+
::: eis_toolkit.transformations.coda.plr

eis_toolkit/exceptions.py

+8
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,11 @@ class InvalidModelException(Exception):
8888

8989
class InvalidDatasetException(Exception):
9090
"""Exception error class when the dataset is null."""
91+
92+
93+
class NonNumericDataException(Exception):
94+
"""Exception error class for when the given data includes non-numeric values."""
95+
96+
97+
class InvalidCompositionException(Exception):
98+
"""Exception error class for when the data is not in suitable form for compositional data transforms."""

eis_toolkit/transformations/coda/__init__.py

Whitespace-only changes.
+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from numbers import Number
2+
3+
import numpy as np
4+
import pandas as pd
5+
from beartype import beartype
6+
from beartype.typing import Sequence
7+
8+
from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException
9+
from eis_toolkit.utilities.aitchison_geometry import _closure
10+
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
11+
from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern
12+
13+
14+
@beartype
15+
def _alr_transform(df: pd.DataFrame, columns: Sequence[str], denominator_column: str) -> pd.DataFrame:
16+
17+
ratios = df[columns].div(df[denominator_column], axis=0)
18+
return np.log(ratios)
19+
20+
21+
@beartype
22+
def alr_transform(df: pd.DataFrame, column: str = None, keep_denominator_column: bool = False) -> pd.DataFrame:
23+
"""
24+
Perform an additive logratio transformation on the data.
25+
26+
Args:
27+
df: A dataframe of compositional data.
28+
column: The name of the column to be used as the denominator column.
29+
keep_denominator_column: Whether to include the denominator column in the result. If True, the returned
30+
dataframe retains its original shape.
31+
32+
Returns:
33+
A new dataframe containing the ALR transformed data.
34+
35+
Raises:
36+
InvalidColumnException: The input column isn't found in the dataframe.
37+
InvalidCompositionException: Data is not normalized to the expected value.
38+
NumericValueSignException: Data contains zeros or negative values.
39+
"""
40+
check_in_simplex_sample_space(df)
41+
42+
if column is not None and column not in df.columns:
43+
raise InvalidColumnException(f"The column {column} was not found in the dataframe.")
44+
45+
column = column if column is not None else df.columns[-1]
46+
47+
columns = [col for col in df.columns]
48+
49+
if not keep_denominator_column and column in columns:
50+
columns.remove(column)
51+
52+
return rename_columns_by_pattern(_alr_transform(df, columns, column))
53+
54+
55+
@beartype
56+
def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame:
57+
dfc = df.copy()
58+
59+
if denominator_column not in dfc.columns.values:
60+
# Add the denominator column
61+
dfc[denominator_column] = 0.0
62+
63+
return _closure(np.exp(dfc), scale)
64+
65+
66+
@beartype
67+
def inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame:
68+
"""
69+
Perform the inverse transformation for a set of ALR transformed data.
70+
71+
Args:
72+
df: A dataframe of ALR transformed compositional data.
73+
denominator_column: The name of the denominator column.
74+
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
75+
as percentages, scale=100.
76+
77+
Returns:
78+
A dataframe containing the inverse transformed data.
79+
80+
Raises:
81+
NumericValueSignException: The input scale value is zero or less.
82+
"""
83+
if scale <= 0:
84+
raise NumericValueSignException("The scale value should be positive.")
85+
86+
return _inverse_alr(df, denominator_column, scale)
+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from numbers import Number
2+
3+
import numpy as np
4+
import pandas as pd
5+
from beartype import beartype
6+
from beartype.typing import Optional, Sequence
7+
from scipy.stats import gmean
8+
9+
from eis_toolkit.exceptions import NumericValueSignException
10+
from eis_toolkit.utilities.aitchison_geometry import _closure
11+
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
12+
from eis_toolkit.utilities.miscellaneous import rename_columns, rename_columns_by_pattern
13+
14+
15+
@beartype
16+
def _centered_ratio(row: pd.Series) -> pd.Series:
17+
18+
return row / gmean(row)
19+
20+
21+
@beartype
22+
def _clr_transform(df: pd.DataFrame) -> pd.DataFrame:
23+
24+
dfc = df.copy()
25+
dfc = dfc.apply(_centered_ratio, axis=1)
26+
27+
return np.log(dfc)
28+
29+
30+
@beartype
31+
def clr_transform(df: pd.DataFrame) -> pd.DataFrame:
32+
"""
33+
Perform a centered logratio transformation on the data.
34+
35+
Args:
36+
df: A dataframe of compositional data.
37+
38+
Returns:
39+
A new dataframe containing the CLR transformed data.
40+
41+
Raises:
42+
InvalidCompositionException: Data is not normalized to the expected value.
43+
NumericValueSignException: Data contains zeros or negative values.
44+
"""
45+
check_in_simplex_sample_space(df)
46+
return rename_columns_by_pattern(_clr_transform(df))
47+
48+
49+
@beartype
50+
def _inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame:
51+
inverse = _closure(np.exp(df), scale)
52+
53+
if colnames is not None:
54+
return rename_columns(inverse, colnames)
55+
56+
return inverse
57+
58+
59+
@beartype
60+
def inverse_clr(df: pd.DataFrame, colnames: Optional[Sequence[str]] = None, scale: Number = 1.0) -> pd.DataFrame:
61+
"""
62+
Perform the inverse transformation for a set of CLR transformed data.
63+
64+
Args:
65+
df: A dataframe of CLR transformed compositional data.
66+
colnames: List of column names to rename the columns to.
67+
scale: The value to which each composition should be normalized. Eg., if the composition is expressed
68+
as percentages, scale=100.
69+
70+
Returns:
71+
A dataframe containing the inverse transformed data.
72+
73+
Raises:
74+
NumericValueSignException: The input scale value is zero or less.
75+
"""
76+
if scale <= 0:
77+
raise NumericValueSignException("The scale value should be positive.")
78+
79+
return _inverse_clr(df, colnames, scale)
+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import numpy as np
2+
import pandas as pd
3+
from beartype import beartype
4+
from beartype.typing import Sequence
5+
from scipy.stats import gmean
6+
7+
from eis_toolkit.exceptions import InvalidColumnException, InvalidCompositionException, InvalidParameterValueException
8+
from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space
9+
from eis_toolkit.utilities.checks.dataframe import check_columns_valid
10+
from eis_toolkit.utilities.checks.parameter import check_lists_overlap, check_numeric_value_sign
11+
12+
13+
@beartype
14+
def _calculate_ilr_scaling_factor(c1: int, c2: int) -> np.float64:
15+
"""
16+
Calculate the scaling factor for the ILR transform.
17+
18+
Args:
19+
c1: The cardinality of the first subcomposition.
20+
c2: The cardinality of the second subcomposition.
21+
22+
Returns:
23+
The scaling factor.
24+
25+
Raises:
26+
InvalidParameterValueException: One or both of the input values are zero or negative.
27+
"""
28+
if not (check_numeric_value_sign(c1) and check_numeric_value_sign(c2)):
29+
raise InvalidParameterValueException("Input values must both be positive integers.")
30+
31+
return np.sqrt((c1 * c2) / np.float64(c1 + c2))
32+
33+
34+
@beartype
35+
def _geometric_mean_logratio(
36+
row: pd.Series, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str]
37+
) -> np.float64:
38+
39+
numerator = gmean(row[subcomposition_1])
40+
denominator = gmean(row[subcomposition_2])
41+
return np.log(numerator / denominator)
42+
43+
44+
@beartype
45+
def _single_ilr_transform(
46+
df: pd.DataFrame, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str]
47+
) -> pd.Series:
48+
49+
dfc = df.copy()
50+
51+
c1 = len(subcomposition_1)
52+
c2 = len(subcomposition_2)
53+
54+
# A Series to hold the transformed rows
55+
ilr_values = pd.Series([0.0] * df.shape[0])
56+
57+
for idx, row in dfc.iterrows():
58+
ilr_values[idx] = _geometric_mean_logratio(row, subcomposition_1, subcomposition_2)
59+
60+
ilr_values = _calculate_ilr_scaling_factor(c1, c2) * ilr_values
61+
62+
return ilr_values
63+
64+
65+
@beartype
66+
def single_ilr_transform(
67+
df: pd.DataFrame, subcomposition_1: Sequence[str], subcomposition_2: Sequence[str]
68+
) -> pd.Series:
69+
"""
70+
Perform a single isometric logratio transformation on the provided subcompositions.
71+
72+
Returns ILR balances. Column order matters.
73+
74+
Args:
75+
df: A dataframe of shape [N, D] of compositional data.
76+
subcomposition_1: Names of the columns in the numerator part of the ratio.
77+
subcomposition_2: Names of the columns in the denominator part of the ratio.
78+
79+
Returns:
80+
A series of length N containing the transforms.
81+
82+
Raises:
83+
InvalidColumnException: One or more subcomposition columns are not found in the input dataframe.
84+
InvalidCompositionException: Data is not normalized to the expected value or
85+
one or more columns are found in both subcompositions.
86+
InvalidParameterValueException: At least one subcomposition provided was empty.
87+
NumericValueSignException: Data contains zeros or negative values.
88+
"""
89+
check_in_simplex_sample_space(df)
90+
91+
if not (subcomposition_1 and subcomposition_2):
92+
raise InvalidParameterValueException("A subcomposition should contain at least one column.")
93+
94+
if not (check_columns_valid(df, subcomposition_1) and check_columns_valid(df, subcomposition_2)):
95+
raise InvalidColumnException("Not all of the input columns were found in the input dataframe.")
96+
97+
if check_lists_overlap(subcomposition_1, subcomposition_2):
98+
raise InvalidCompositionException("The subcompositions overlap.")
99+
100+
return _single_ilr_transform(df, subcomposition_1, subcomposition_2)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
from numbers import Number
2+
3+
import numpy as np
4+
import pandas as pd
5+
from beartype import beartype
6+
7+
from eis_toolkit.exceptions import InvalidColumnException, InvalidParameterValueException
8+
from eis_toolkit.utilities.checks.dataframe import check_dataframe_contains_zeros
9+
10+
11+
@beartype
12+
def _single_pairwise_logratio(numerator: Number, denominator: Number) -> np.float64:
13+
14+
return np.log(numerator / float(denominator))
15+
16+
17+
@beartype
18+
def single_pairwise_logratio(numerator: Number, denominator: Number) -> np.float64:
19+
"""
20+
Perform a pairwise logratio transformation on the given values.
21+
22+
Args:
23+
numerator: The numerator in the ratio.
24+
denominator: The denominator in the ratio.
25+
26+
Returns:
27+
The transformed value.
28+
29+
Raises:
30+
InvalidParameterValueException: One or both input values are zero.
31+
"""
32+
if numerator == 0 or denominator == 0:
33+
raise InvalidParameterValueException("Input values cannot be zero.")
34+
35+
return _single_pairwise_logratio(numerator, denominator)
36+
37+
38+
@beartype
39+
def _pairwise_logratio(df: pd.DataFrame, numerator_column: str, denominator_column: str) -> pd.Series:
40+
dfc = df.copy()
41+
42+
result = pd.Series([0.0] * df.shape[0])
43+
44+
for idx, row in dfc.iterrows():
45+
result[idx] = single_pairwise_logratio(row[numerator_column], row[denominator_column])
46+
47+
return result
48+
49+
50+
@beartype
51+
def pairwise_logratio(df: pd.DataFrame, numerator_column: str, denominator_column: str) -> pd.Series:
52+
"""
53+
Perform a pairwise logratio transformation on the given columns.
54+
55+
Args:
56+
df: The dataframe containing the columns to use in the transformation.
57+
numerator_column: The name of the column to use as the numerator column.
58+
denominator_column: The name of the column to use as the denominator.
59+
60+
Returns:
61+
A series containing the transformed values.
62+
63+
Raises:
64+
InvalidColumnException: One or both of the input columns are not found in the dataframe.
65+
InvalidParameterValueException: The input columns contain at least one zero value.
66+
"""
67+
if numerator_column not in df.columns or denominator_column not in df.columns:
68+
raise InvalidColumnException("At least one input column is not found in the dataframe.")
69+
70+
if check_dataframe_contains_zeros(df[[numerator_column, denominator_column]]):
71+
raise InvalidParameterValueException("The input columns contain at least one zero value.")
72+
73+
return _pairwise_logratio(df, numerator_column, denominator_column)

0 commit comments

Comments
 (0)