|
1 |
| -from numbers import Number |
2 |
| - |
3 |
| -import numpy as np |
4 |
| -import pandas as pd |
5 |
| -from beartype import beartype |
6 |
| -from beartype.typing import Optional, Sequence |
7 |
| - |
8 |
| -from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException |
9 |
| -from eis_toolkit.utilities.aitchison_geometry import _closure |
10 |
| -from eis_toolkit.utilities.checks.compositional import check_compositional_data |
11 |
| -from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern |
12 |
| - |
13 |
| - |
14 |
| -@beartype |
15 |
| -def _alr_transform(df: pd.DataFrame, columns: Sequence[str], denominator_column: str) -> pd.DataFrame: |
16 |
| - |
17 |
| - ratios = df[columns].div(df[denominator_column], axis=0) |
18 |
| - return np.log(ratios) |
19 |
| - |
20 |
| - |
21 |
| -@beartype |
22 |
| -def alr_transform( |
23 |
| - df: pd.DataFrame, column: Optional[str] = None, keep_denominator_column: bool = False |
24 |
| -) -> pd.DataFrame: |
25 |
| - """ |
26 |
| - Perform an additive logratio transformation on the data. |
27 |
| -
|
28 |
| - Args: |
29 |
| - df: A dataframe of compositional data. |
30 |
| - column: The name of the column to be used as the denominator column. |
31 |
| - keep_denominator_column: Whether to include the denominator column in the result. If True, the returned |
32 |
| - dataframe retains its original shape. |
33 |
| -
|
34 |
| - Returns: |
35 |
| - A new dataframe containing the ALR transformed data. |
36 |
| -
|
37 |
| - Raises: |
38 |
| - InvalidColumnException: The input column isn't found in the dataframe. |
39 |
| - InvalidCompositionException: Data is not normalized to the expected value. |
40 |
| - NumericValueSignException: Data contains zeros or negative values. |
41 |
| - """ |
42 |
| - check_compositional_data(df) |
43 |
| - |
44 |
| - if column is not None and column not in df.columns: |
45 |
| - raise InvalidColumnException(f"The column {column} was not found in the dataframe.") |
46 |
| - |
47 |
| - column = column if column is not None else df.columns[-1] |
48 |
| - |
49 |
| - columns = [col for col in df.columns] |
50 |
| - |
51 |
| - if not keep_denominator_column and column in columns: |
52 |
| - columns.remove(column) |
53 |
| - |
54 |
| - return rename_columns_by_pattern(_alr_transform(df, columns, column)) |
55 |
| - |
56 |
| - |
57 |
| -@beartype |
58 |
| -def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: |
59 |
| - dfc = df.copy() |
60 |
| - |
61 |
| - if denominator_column not in dfc.columns.values: |
62 |
| - # Add the denominator column |
63 |
| - dfc[denominator_column] = 0.0 |
64 |
| - |
65 |
| - return _closure(np.exp(dfc), scale) |
66 |
| - |
67 |
| - |
68 |
| -@beartype |
69 |
| -def inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: |
70 |
| - """ |
71 |
| - Perform the inverse transformation for a set of ALR transformed data. |
72 |
| -
|
73 |
| - Args: |
74 |
| - df: A dataframe of ALR transformed compositional data. |
75 |
| - denominator_column: The name of the denominator column. |
76 |
| - scale: The value to which each composition should be normalized. Eg., if the composition is expressed |
77 |
| - as percentages, scale=100. |
78 |
| -
|
79 |
| - Returns: |
80 |
| - A dataframe containing the inverse transformed data. |
81 |
| -
|
82 |
| - Raises: |
83 |
| - NumericValueSignException: The input scale value is zero or less. |
84 |
| - """ |
85 |
| - if scale <= 0: |
86 |
| - raise NumericValueSignException("The scale value should be positive.") |
87 |
| - |
88 |
| - return _inverse_alr(df, denominator_column, scale) |
| 1 | +from numbers import Number |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pandas as pd |
| 5 | +from beartype import beartype |
| 6 | +from beartype.typing import Optional, Sequence |
| 7 | + |
| 8 | +from eis_toolkit.exceptions import InvalidColumnException, NumericValueSignException |
| 9 | +from eis_toolkit.utilities.aitchison_geometry import _closure |
| 10 | +from eis_toolkit.utilities.checks.compositional import check_in_simplex_sample_space |
| 11 | +from eis_toolkit.utilities.miscellaneous import rename_columns_by_pattern |
| 12 | + |
| 13 | + |
| 14 | +@beartype |
| 15 | +def _alr_transform(df: pd.DataFrame, columns: Sequence[str], denominator_column: str) -> pd.DataFrame: |
| 16 | + |
| 17 | + ratios = df[columns].div(df[denominator_column], axis=0) |
| 18 | + return np.log(ratios) |
| 19 | + |
| 20 | + |
| 21 | +@beartype |
| 22 | +def alr_transform( |
| 23 | + df: pd.DataFrame, column: Optional[str] = None, keep_denominator_column: bool = False |
| 24 | +) -> pd.DataFrame: |
| 25 | + """ |
| 26 | + Perform an additive logratio transformation on the data. |
| 27 | +
|
| 28 | + Args: |
| 29 | + df: A dataframe of compositional data. |
| 30 | + column: The name of the column to be used as the denominator column. |
| 31 | + keep_denominator_column: Whether to include the denominator column in the result. If True, the returned |
| 32 | + dataframe retains its original shape. |
| 33 | +
|
| 34 | + Returns: |
| 35 | + A new dataframe containing the ALR transformed data. |
| 36 | +
|
| 37 | + Raises: |
| 38 | + InvalidColumnException: The input column isn't found in the dataframe. |
| 39 | + InvalidCompositionException: Data is not normalized to the expected value. |
| 40 | + NumericValueSignException: Data contains zeros or negative values. |
| 41 | + """ |
| 42 | + check_in_simplex_sample_space(df) |
| 43 | + |
| 44 | + if column is not None and column not in df.columns: |
| 45 | + raise InvalidColumnException(f"The column {column} was not found in the dataframe.") |
| 46 | + |
| 47 | + column = column if column is not None else df.columns[-1] |
| 48 | + |
| 49 | + columns = [col for col in df.columns] |
| 50 | + |
| 51 | + if not keep_denominator_column and column in columns: |
| 52 | + columns.remove(column) |
| 53 | + |
| 54 | + return rename_columns_by_pattern(_alr_transform(df, columns, column)) |
| 55 | + |
| 56 | + |
| 57 | +@beartype |
| 58 | +def _inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: |
| 59 | + dfc = df.copy() |
| 60 | + |
| 61 | + if denominator_column not in dfc.columns.values: |
| 62 | + # Add the denominator column |
| 63 | + dfc[denominator_column] = 0.0 |
| 64 | + |
| 65 | + return _closure(np.exp(dfc), scale) |
| 66 | + |
| 67 | + |
| 68 | +@beartype |
| 69 | +def inverse_alr(df: pd.DataFrame, denominator_column: str, scale: Number = 1.0) -> pd.DataFrame: |
| 70 | + """ |
| 71 | + Perform the inverse transformation for a set of ALR transformed data. |
| 72 | +
|
| 73 | + Args: |
| 74 | + df: A dataframe of ALR transformed compositional data. |
| 75 | + denominator_column: The name of the denominator column. |
| 76 | + scale: The value to which each composition should be normalized. Eg., if the composition is expressed |
| 77 | + as percentages, scale=100. |
| 78 | +
|
| 79 | + Returns: |
| 80 | + A dataframe containing the inverse transformed data. |
| 81 | +
|
| 82 | + Raises: |
| 83 | + NumericValueSignException: The input scale value is zero or less. |
| 84 | + """ |
| 85 | + if scale <= 0: |
| 86 | + raise NumericValueSignException("The scale value should be positive.") |
| 87 | + |
| 88 | + return _inverse_alr(df, denominator_column, scale) |
0 commit comments