diff --git a/ITMO_FS/embedded/WeightedEvReg.py b/ITMO_FS/embedded/WeightedEvReg.py new file mode 100644 index 00000000..0c1897ad --- /dev/null +++ b/ITMO_FS/embedded/WeightedEvReg.py @@ -0,0 +1,213 @@ +import math +import numpy as np +import random +from collections import defaultdict + +from ITMO_FS.utils import apply_cr + +from ..utils import BaseTransformer + + +class WeightedEvReg(BaseTransformer): + """ + Builds weighted evidential regression model, which learns features weights during fitting. + Thus learnt feature wieghts can be used as ranks in feature selection. + + Parameters + ---------- + X : numpy array, shape (n_samples, n_features) + The input samples. + y : numpy array, shape (n_samples, ) + The classes for the samples. + alpha : np.float64 + Learning rate (Optional 0.01 by default) + num_epochs : int + Number of epochs of gradient descent (Optional 1000 by default) + p : int + Power of minkoswki distance (Optional 2 by default) + k : int + Number of neighbors for knn-approach optimization (Optional 0.1 from X.shape[0] by default) + radius : np.float64 + Radius of the RBF distance + + Returns + ------- + Score for each feature as a numpy array, shape (n_features, ) + + See Also + -------- + https://www.researchgate.net/publication/343493691_Feature_Selection_for_Health_Care_Costs_Prediction_Using_Weighted_Evidential_Regression + + Note: + The main idea is to use the weighted EVREG for predicting labels and then optimize the weights according to loss via + gradient descent for fixed number of epochs. The weights are used in counting distance between objects, thus + weighting features impact in distance values. While optimizing features impact in distance algorithm optimizes + quality of prediction thus finding the bond between feature and prediction and performing feature selection + + Examples + -------- + >>> import sklearn.datasets as datasets + >>> from ITMO_FS.embedded import WeightedEvReg + >>> X = np.array([[1, 2, 3, 3, 1],[2, 2, 3, 3, 2], [1, 3, 3, 1, 3],[3, 1, 3, 1, 4],[4, 4, 3, 1, 5]], dtype = np.integer) + >>> y = np.array([1, 2, 3, 4, 5], dtype=np.integer) + >>> weighted_ev_reg = WeightedEvReg(cutting_rule=('K best', 2), num_epochs=100) + >>> weighted_ev_reg.fit(X, y) + >>> print(weighted_ev_reg.selected_features_) + """ + + def __init__(self, cutting_rule, alpha=0.01, num_epochs=1000, p=2, k=None, radius=5.0): + self.alpha = alpha + self.num_epochs = num_epochs + self.p = p + self.k = k + self.radius = radius + self.cutting_rule = cutting_rule + random.seed(42) + + @staticmethod + def __weighted_minkowski_distance(first, second, weights, p): + return sum(abs((first - second) * weights) ** p) ** (1.0 / p) + + @staticmethod + def __rbf(distance, radius): + return math.exp(-(distance ** 2) / radius) + + def __rbf_vectors(self, first, second, weights, p, radius): + return math.exp(-(self.__weighted_minkowski_distance(first, second, weights, p) ** 2) / radius) + + def __count_K(self, X, index, nearest_neighbors, weights, p, radius): + all_distances = [self.__rbf(self.__weighted_minkowski_distance(X[index], X[t], weights, p), radius) for t in + nearest_neighbors] + distances_minus = np.prod([1 - dist for dist in all_distances]) + distances_without = [dist * distances_minus / (1 - dist) for dist in all_distances] + return distances_minus + sum(distances_without), distances_without, distances_minus + + @staticmethod + def __elements_number(k_smallest): + sum(map(lambda t: len(t), k_smallest.values())) + + def __evreg_predict(self, X, y, index, cur_weights, p, k, radius): + to_predict = X[index] + k_smallest = defaultdict(list) + for i in range(X.shape[0]): + if i == index: + continue + cur_distance = self.__weighted_minkowski_distance(to_predict, X[i], cur_weights, p) + if self.__elements_number(k_smallest) == k: + max_smallest = max(k_smallest.keys()) + if cur_distance < max_smallest: + del k_smallest[max_smallest][random.randint(0, len(k_smallest[max_smallest]) - 1)] + k_smallest[cur_distance] = i + else: + k_smallest[cur_distance].append(i) + nearest_neighbors = list([item for sublist in k_smallest.values() for item in sublist]) + K, distances_without, m_star = self.__count_K(X, index, nearest_neighbors, cur_weights, p, radius) + m = 1.0 / K * np.array(distances_without) + return sum(m[i] * y[nearest_neighbors[i]] for i in range(k)) + m_star * ( + max(y[nearest_neighbors]) + min(y[nearest_neighbors])) / 2 + + @staticmethod + def __count_loss(expected_y, predicted_y): + return 1.0 / len(expected_y) * sum((expected_y - predicted_y) ** 2) + + @staticmethod + def __minkowski_derivative(first, second, weights, p): + return sum(abs((first - second) * weights) ** p) ** (1.0 / p - 1) * p / (p - 1) * ((first - second) ** (p - 1)) + + def __rbf_derivative(self, first, second, weights, p, radius): + distance = self.__weighted_minkowski_distance(first, second, weights, p) + return -2.0 / radius * self.__rbf(distance, radius) * distance * self.__minkowski_derivative(first, second, + weights, p) + + def __prod_seq_func(self, X, index, skip, weights, p, radius, also_skip=None): + return np.prod( + [1 - self.__rbf_vectors(X[index], X[i], weights, p, radius) for i in range(X.shape[0]) if + i not in skip and i != also_skip]) + + def __product_sequence_derivative(self, X, index, skip, weights, p, radius): + return np.sum( + [self.__rbf_derivative(index, i, weights, p, radius) * self.__prod_seq_func(X, index, skip, weights, p, + radius, i) for + i + in range(X.shape[0]) if + i not in skip], + axis=0) + + def __K_derivative(self, X, index, weights, p, radius): + sum_func = lambda skip: self.__rbf_derivative(X[index], X[skip], weights, p, radius) * \ + self.__prod_seq_func(X, index, [skip, index], weights, p, radius) + \ + self.__rbf_vectors(X[index], X[skip], weights, p, radius) * \ + self.__product_sequence_derivative(X, index, [index, skip], weights, p, radius) + + return self.__product_sequence_derivative(X, index, [index], weights, p, radius) + np.sum( + [sum_func(i) for i in range(X.shape[0]) if i != index], axis=0) + + def __count_K_all(self, X, index, weights, p, radius): + all_distances = [self.__rbf(self.__weighted_minkowski_distance(X[index], X[t], weights, p), radius) for t in + range(X.shape[0]) if t != index] + distances_minus = np.prod([1 - dist for dist in all_distances]) + distances_without = [dist * distances_minus / (1 - dist) for dist in all_distances] + return distances_minus + sum(distances_without), distances_without, distances_minus + + def __single_mass_derivative(self, X, i, j, weights, p, radius): + K, _, distances_minus = self.__count_K_all(X, i, weights, p, radius) + return (K * self.__rbf_derivative(X[i], X[j], weights, p, radius) - self.__K_derivative(X, i, weights, p, + radius) * + self.__rbf_vectors(X[i], X[j], weights, p, radius)) * distances_minus / (K ** 2) + \ + self.__rbf_vectors(X[i], X[j], weights, p, radius) / \ + K * self.__product_sequence_derivative(X, i, [i], weights, p, radius) + + def __mass_star_derivative(self, X, i, weights, p, radius): + K, _, distances_minus = self.__count_K_all(X, i, weights, p, radius) + return (K * self.__product_sequence_derivative(X, i, [i], weights, p, radius) - + self.__K_derivative(X, i, weights, p, radius) * distances_minus) / (K ** 2) + + def __y_derivative(self, X, i, weights, p, radius, y): + y_der = [0 for i in range(len(weights))] + y_lab = [y[j] for j in range(len(y)) if j != i] + for j in range(X.shape[0]): + if j == i: + continue + y_der += self.__single_mass_derivative(X, i, j, weights, p, radius) * y[j] + y_der += self.__mass_star_derivative(X, i, weights, p, radius) * (max(y_lab) + min(y_lab)) / 2 + return y_der + + def __update_weights(self, X, y, alpha, weights, p, radius): + return weights + alpha * 2.0 / X.shape[0] * -1 * \ + np.sum([self.__y_derivative(X, i, weights, p, radius, y) for i in range(X.shape[0])], axis=0) + + def _fit(self, X, y): + """ + Runs the Weighted evidential regression algorithm on the specified dataset. + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + The input samples. + y : array-like, shape (n_samples) + The classes for the samples. + + Returns + ------ + None + """ + if self.k is None: + self.k = int(0.1 * X.shape[0]) + if self.k < 1: + self.k = X.shape[0] - 1 + print(self.k) + feature_size = X.shape[1] + best_weights = np.ones(feature_size, dtype=np.float64) + min_loss = float('inf') + cur_weights = best_weights.copy() + for _ in range(self.num_epochs): + predicted_y = [] + for i in range(X.shape[0]): + predicted_y.append(self.__evreg_predict(X, y, i, cur_weights, self.p, self.k, self.radius)) + cur_loss = self.__count_loss(y, predicted_y) + cur_weights = self.__update_weights(X, y, self.alpha, cur_weights, self.p, self.radius) + if cur_loss < min_loss: + best_weights = cur_weights + cutting_rule = apply_cr(self.cutting_rule) + + self.selected_features_ = cutting_rule(dict(zip(range(1, len(best_weights)), best_weights))) diff --git a/ITMO_FS/embedded/__init__.py b/ITMO_FS/embedded/__init__.py index a43c1f99..7731d145 100644 --- a/ITMO_FS/embedded/__init__.py +++ b/ITMO_FS/embedded/__init__.py @@ -1 +1,2 @@ from .MOS import MOS +from .WeightedEvReg import WeightedEvReg diff --git a/ITMO_FS/ensembles/model_based/best_sum.py b/ITMO_FS/ensembles/model_based/best_sum.py index b6f3c5ea..0064489f 100644 --- a/ITMO_FS/ensembles/model_based/best_sum.py +++ b/ITMO_FS/ensembles/model_based/best_sum.py @@ -1,8 +1,5 @@ import numpy as np -from sklearn.utils import check_array from ...utils import BaseTransformer, generate_features, apply_cr -from ...filters.univariate.measures import GLOB_CR, GLOB_MEASURE -from sklearn.utils.validation import check_is_fitted class BestSum(BaseTransformer): ## TODO refactor , not stable diff --git a/ITMO_FS/filters/univariate/UnivariateFilter.py b/ITMO_FS/filters/univariate/UnivariateFilter.py index a04b8737..560b7351 100644 --- a/ITMO_FS/filters/univariate/UnivariateFilter.py +++ b/ITMO_FS/filters/univariate/UnivariateFilter.py @@ -1,6 +1,6 @@ from numpy import ndarray -from .measures import GLOB_CR, GLOB_MEASURE +from .measures import GLOB_MEASURE from ...utils import BaseTransformer, generate_features, check_restrictions, apply_cr @@ -30,7 +30,7 @@ class UnivariateFilter(BaseTransformer): # TODO ADD LOGGING -------- >>> from sklearn.datasets import make_classification - >>> from ITMO_FS.filters.univariate import select_k_best + >>> from ITMO_FS.utils import select_k_best >>> from ITMO_FS.filters.univariate import UnivariateFilter >>> from ITMO_FS.filters.univariate import f_ratio_measure >>> x, y = make_classification(1000, 100, n_informative = 10, n_redundant = 30, \ diff --git a/ITMO_FS/filters/univariate/__init__.py b/ITMO_FS/filters/univariate/__init__.py index 5ab15c60..ab2193f4 100644 --- a/ITMO_FS/filters/univariate/__init__.py +++ b/ITMO_FS/filters/univariate/__init__.py @@ -2,8 +2,7 @@ from .VDM import VDM from .measures import anova, fit_criterion_measure, f_ratio_measure, gini_index, su_measure, modified_t_score, fechner_corr, \ information_gain, reliefF_measure, chi2_measure, spearman_corr, pearson_corr, laplacian_score, qpfs_filter, \ - kendall_corr, select_k_best, select_k_worst, select_worst_by_value, select_best_by_value, select_best_percentage,\ - select_worst_percentage + kendall_corr from .NDFS import NDFS from .RFS import RFS from .SPEC import SPEC diff --git a/ITMO_FS/filters/univariate/measures.py b/ITMO_FS/filters/univariate/measures.py index 2958c0c6..3f740c7b 100644 --- a/ITMO_FS/filters/univariate/measures.py +++ b/ITMO_FS/filters/univariate/measures.py @@ -407,6 +407,7 @@ def reliefF_measure(X, y, k_neighbors=1): with np.errstate(divide='ignore', invalid="ignore"): # todo return f_ratios / (np.amax(X, axis=0) - np.amin(X, axis=0)) + def relief_measure(X, y, m=None): """ Computes Relief measure for each feature. @@ -947,78 +948,6 @@ def modified_t_score(X, y): "Relief": relief_measure} -def select_best_by_value(value): - return _wrapped_partial(__select_by_value, value=value, more=True) - - -def select_worst_by_value(value): - return _wrapped_partial(__select_by_value, value=value, more=False) - - -def __select_by_value(scores, value, more=True): - features = [] - for key, sc_value in scores.items(): - if more: - if sc_value >= value: - features.append(key) - else: - if sc_value <= value: - features.append(key) - return features - - -def select_k_best(k): - return _wrapped_partial(__select_k, k=k, reverse=True) - - -def select_k_worst(k): - return _wrapped_partial(__select_k, k=k) - - -def __select_k(scores, k, reverse=False): - if type(k) != int: - raise TypeError("Number of features should be integer") - if k > len(scores): - raise ValueError("Cannot select %d features with n_features = %d" % (k, len(scores))) - return [keys[0] for keys in sorted(scores.items(), key=lambda kv: kv[1], reverse=reverse)[:k]] - - -def __select_percentage_best(scores, percent): - features = [] - max_val = max(scores.values()) - threshold = max_val * percent - for key, sc_value in scores.items(): - if sc_value >= threshold: - features.append(key) - return features - - -def select_best_percentage(percent): - return _wrapped_partial(__select_percentage_best, percent=percent) - - -def __select_percentage_worst(scores, percent): - features = [] - max_val = min(scores.values()) - threshold = max_val * percent - for key, sc_value in scores.items(): - if sc_value >= threshold: - features.append(key) - return features - - -def select_worst_percentage(percent): - return _wrapped_partial(__select_percentage_worst, percent=percent) - - -GLOB_CR = {"Best by value": select_best_by_value, - "Worst by value": select_worst_by_value, - "K best": select_k_best, - "K worst": select_k_worst, - "Worst by percentage": select_worst_percentage, - "Best by percentage": select_best_percentage} - - def qpfs_filter(X, y, r=None, sigma=None, solv='quadprog', fn=pearson_corr): """ Performs Quadratic Programming Feature Selection algorithm. diff --git a/ITMO_FS/utils/__init__.py b/ITMO_FS/utils/__init__.py index 9fecb701..5aab1945 100644 --- a/ITMO_FS/utils/__init__.py +++ b/ITMO_FS/utils/__init__.py @@ -4,3 +4,5 @@ from .qpfs_body import qpfs_body from .base_transformer import BaseTransformer from .base_wrapper import BaseWrapper +from .cutting_rules import select_k_best, select_k_worst, select_worst_by_value, select_best_by_value, select_best_percentage,\ + select_worst_percentage \ No newline at end of file diff --git a/ITMO_FS/utils/cutting_rules.py b/ITMO_FS/utils/cutting_rules.py new file mode 100644 index 00000000..8e74033d --- /dev/null +++ b/ITMO_FS/utils/cutting_rules.py @@ -0,0 +1,79 @@ +from functools import partial, update_wrapper + + +def _wrapped_partial(func, *args, **kwargs): + partial_func = partial(func, *args, **kwargs) + update_wrapper(partial_func, func) + return partial_func + + +def select_best_by_value(value): + return _wrapped_partial(__select_by_value, value=value, more=True) + + +def select_worst_by_value(value): + return _wrapped_partial(__select_by_value, value=value, more=False) + + +def __select_by_value(scores, value, more=True): + features = [] + for key, sc_value in scores.items(): + if more: + if sc_value >= value: + features.append(key) + else: + if sc_value <= value: + features.append(key) + return features + + +def select_k_best(k): + return _wrapped_partial(__select_k, k=k, reverse=True) + + +def select_k_worst(k): + return _wrapped_partial(__select_k, k=k) + + +def __select_k(scores, k, reverse=False): + if type(k) != int: + raise TypeError("Number of features should be integer") + if k > len(scores): + raise ValueError("Cannot select %d features with n_features = %d" % (k, len(scores))) + return [keys[0] for keys in sorted(scores.items(), key=lambda kv: kv[1], reverse=reverse)[:k]] + + +def __select_percentage_best(scores, percent): + features = [] + max_val = max(scores.values()) + threshold = max_val * percent + for key, sc_value in scores.items(): + if sc_value >= threshold: + features.append(key) + return features + + +def select_best_percentage(percent): + return _wrapped_partial(__select_percentage_best, percent=percent) + + +def __select_percentage_worst(scores, percent): + features = [] + max_val = min(scores.values()) + threshold = max_val * percent + for key, sc_value in scores.items(): + if sc_value >= threshold: + features.append(key) + return features + + +def select_worst_percentage(percent): + return _wrapped_partial(__select_percentage_worst, percent=percent) + + +GLOB_CR = {"Best by value": select_best_by_value, + "Worst by value": select_worst_by_value, + "K best": select_k_best, + "K worst": select_k_worst, + "Worst by percentage": select_worst_percentage, + "Best by percentage": select_best_percentage} diff --git a/ITMO_FS/utils/functions.py b/ITMO_FS/utils/functions.py index 50e9d07d..50e52165 100644 --- a/ITMO_FS/utils/functions.py +++ b/ITMO_FS/utils/functions.py @@ -1,6 +1,9 @@ import numpy as np from sklearn.metrics import f1_score +from ITMO_FS.utils.cutting_rules import GLOB_CR + + def normalize(x): x = np.abs(np.array(x)) max_ = max(x) @@ -122,7 +125,6 @@ def power_neg_half(M): return np.sqrt(np.linalg.inv(M)) def apply_cr(cutting_rule): - from ..filters.univariate.measures import GLOB_CR, GLOB_MEASURE if type(cutting_rule) is tuple: cutting_rule_name = cutting_rule[0] cutting_rule_value = cutting_rule[1] diff --git a/ITMO_FS/utils/qpfs_body.py b/ITMO_FS/utils/qpfs_body.py index 3ba00a71..08d4f54c 100644 --- a/ITMO_FS/utils/qpfs_body.py +++ b/ITMO_FS/utils/qpfs_body.py @@ -1,4 +1,3 @@ -import math from functools import partial import numpy as np diff --git a/requirements.txt b/requirements.txt index cbad7c4c..64dd84c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +pandas imbalanced-learn numpy scipy diff --git a/test/Melif_test.py b/test/Melif_test.py index 652ad31d..8466d450 100644 --- a/test/Melif_test.py +++ b/test/Melif_test.py @@ -11,6 +11,7 @@ from ITMO_FS.filters import * from ITMO_FS.hybrid.Melif import Melif from ITMO_FS.utils import test_scorer +from ITMO_FS.utils.cutting_rules import select_k_best class MyTestCase(unittest.TestCase): diff --git a/test/ensemble_test.py b/test/ensemble_test.py index 1c29f719..aec549e0 100644 --- a/test/ensemble_test.py +++ b/test/ensemble_test.py @@ -1,7 +1,6 @@ import time import unittest import numpy as np -from collections import defaultdict from sklearn.datasets import make_classification, make_regression from sklearn.metrics import f1_score @@ -11,6 +10,7 @@ from ITMO_FS.ensembles.measure_based import * from ITMO_FS.ensembles.ranking_based import * from ITMO_FS.filters.univariate import * +from ITMO_FS.utils.cutting_rules import select_k_best class MyTestCase(unittest.TestCase): @@ -119,5 +119,6 @@ def test_benching_ensembles(self): print('Ensemble score', np.mean(scores_ens), np.std(scores_ens)) print() + if __name__ == '__main__': unittest.main() diff --git a/test/multivariate_filters_test.py b/test/multivariate_filters_test.py index 72db637b..47909df1 100644 --- a/test/multivariate_filters_test.py +++ b/test/multivariate_filters_test.py @@ -3,7 +3,6 @@ import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline -import numpy as np from sklearn.utils.estimator_checks import check_estimator @@ -94,7 +93,7 @@ def test_measures(self): assert self.data.shape[0] == res.shape[0] and res.shape[1] == 10 def test_df(self): - for f in [FCBFDiscreteFilter(), DISRWithMassive(10), JMIM(10), MultivariateFilter(MIM, 10),\ + for f in [FCBFDiscreteFilter(), DISRWithMassive(10), JMIM(10), MultivariateFilter(MIM, 10), \ TraceRatioFisher(10), STIR(10)]: df = f.fit_transform(pd.DataFrame(self.data), pd.DataFrame(self.target)) arr = f.fit_transform(self.data, self.target) @@ -120,7 +119,7 @@ def test_pipeline(self): # FS - FS - estim p = Pipeline([('FS1', TraceRatioFisher(10)), ('FS2', DISRWithMassive(5)), - ('E1', LogisticRegression(max_iter=10000))]) + ('E1', LogisticRegression(max_iter=10000))]) p.fit(self.data, self.target) assert 0 <= p.score(self.data, self.target) <= 1 diff --git a/test/univariate_filters_test.py b/test/univariate_filters_test.py index 7a385774..3f21caa8 100644 --- a/test/univariate_filters_test.py +++ b/test/univariate_filters_test.py @@ -10,8 +10,9 @@ from sklearn.pipeline import Pipeline from sklearn.utils.estimator_checks import check_estimator +from ITMO_FS import WeightedEvReg from ITMO_FS.filters.univariate import * -from ITMO_FS.filters.univariate.measures import GLOB_CR +from ITMO_FS.utils.cutting_rules import select_k_best, select_best_by_value, GLOB_CR from ITMO_FS.utils.information_theory import * np.random.seed(42) @@ -141,7 +142,7 @@ def test_modified_t_score_by_hand_small(self): true_modificator = np.array([(sqrt(3) / 2) / ((0 + 5 / (2 * sqrt(13)) + 0) / 3), (sqrt(3) / (2 * sqrt(7))) / ((0 + 3 / (2 * sqrt(91)) + 4 / sqrt(21)) / 3), (3 * sqrt(3) / (2 * sqrt(13))) / ( - (5 / (2 * sqrt(13)) + 3 / (2 * sqrt(91)) + sqrt(3) / sqrt(13)) / 3), + (5 / (2 * sqrt(13)) + 3 / (2 * sqrt(91)) + sqrt(3) / sqrt(13)) / 3), (1 / 6) / ((0 + 4 / sqrt(21) + sqrt(3) / sqrt(13)) / 3)]) true_scores = true_numerator / true_denominator * true_modificator @@ -169,6 +170,15 @@ def test_modified_t_score_univariate_filter_wide(self): res = univ_filter.transform(data) assert i == res.shape[1] + def test_weighted_evreg(self): + X = np.array([[5, 1, 3, 2], [4, 2, 2, 1], [3, 3, 4, 1], [2, 2, 3, 1], [1, 1, 5, 2]]) + y = np.array([1, 1, 2, 2, 2]) + + weighted_ev_reg = WeightedEvReg(cutting_rule=('K best', 2), num_epochs=100) + weighted_ev_reg.fit(X, y) + + assert weighted_ev_reg.selected_features_ == [1, 2] + def test_igain(self): # TODO: wrong values iris_dataset = load_iris() X = iris_dataset.data @@ -265,9 +275,11 @@ def test_qpfs_restrictions(self): iris_dataset = load_iris() X = iris_dataset.data y = iris_dataset.target - for cutting_rule in [GLOB_CR['Best by value'](0.5), GLOB_CR['Worst by value'](0.5), GLOB_CR['Worst by percentage'](0.5), - GLOB_CR['Best by percentage'](0.5), ('Worst by value', 0.5), ('Best by value', 0.5), ('Worst by percentage', 0.2), - ('Best by percentage', 0.2)]: + for cutting_rule in [GLOB_CR['Best by value'](0.5), GLOB_CR['Worst by value'](0.5), + GLOB_CR['Worst by percentage'](0.5), + GLOB_CR['Best by percentage'](0.5), ('Worst by value', 0.5), ('Best by value', 0.5), + ('Worst by percentage', 0.2), + ('Best by percentage', 0.2)]: f = UnivariateFilter(qpfs_filter, cutting_rule) self.assertRaises(KeyError, f.fit, X, y)