diff --git a/src/feature_selection/fs_algorithms_spark.py b/src/feature_selection/fs_algorithms_spark.py index fcec625d..6cff6ab2 100644 --- a/src/feature_selection/fs_algorithms_spark.py +++ b/src/feature_selection/fs_algorithms_spark.py @@ -34,6 +34,8 @@ def __get_clustering_algorithm_value(cluster_algorithm: ClusteringAlgorithm) -> return 'spectral' if cluster_algorithm == ClusteringAlgorithm.BK_MEANS: return 'bk_means' + if cluster_algorithm == ClusteringAlgorithm.WARD: + return 'ward' return 'k_means' # Default is kmeans diff --git a/src/feature_selection/fs_models.py b/src/feature_selection/fs_models.py index a8de8e1d..6ef092a5 100644 --- a/src/feature_selection/fs_models.py +++ b/src/feature_selection/fs_models.py @@ -1,6 +1,6 @@ from typing import Literal, Union, Optional from django.conf import settings -from sklearn.cluster import KMeans, SpectralClustering, BisectingKMeans +from sklearn.cluster import KMeans, SpectralClustering, BisectingKMeans, AgglomerativeClustering from sksurv.ensemble import RandomSurvivalForest from sksurv.svm import FastKernelSurvivalSVM from .models import ClusteringAlgorithm @@ -12,7 +12,7 @@ SVMOptimizerOptions = Literal["avltree", "rbtree"] # Available models for clustering -ClusteringModels = Union[KMeans, SpectralClustering, BisectingKMeans] +ClusteringModels = Union[KMeans, SpectralClustering, BisectingKMeans, AgglomerativeClustering] def get_clustering_model(clustering_algorithm: ClusteringAlgorithm, @@ -28,6 +28,8 @@ def get_clustering_model(clustering_algorithm: ClusteringAlgorithm, return KMeans(n_clusters=number_of_clusters, random_state=random_state, n_init='auto') elif clustering_algorithm == ClusteringAlgorithm.SPECTRAL: return SpectralClustering(n_clusters=number_of_clusters, random_state=random_state) + elif clustering_algorithm == ClusteringAlgorithm.WARD: + return AgglomerativeClustering(n_clusters=number_of_clusters, linkage='ward') elif clustering_algorithm == ClusteringAlgorithm.BK_MEANS: return BisectingKMeans(n_clusters=number_of_clusters, random_state=random_state) diff --git a/src/feature_selection/migrations/0057_alter_clusteringparameters_algorithm_and_more.py b/src/feature_selection/migrations/0057_alter_clusteringparameters_algorithm_and_more.py new file mode 100644 index 00000000..514406b9 --- /dev/null +++ b/src/feature_selection/migrations/0057_alter_clusteringparameters_algorithm_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.15 on 2025-01-02 20:33 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('feature_selection', '0056_alter_clusteringparameters_algorithm_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='clusteringparameters', + name='algorithm', + field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means'), (4, 'Ward')], default=1), + ), + migrations.AlterField( + model_name='clusteringtimesrecord', + name='algorithm', + field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means'), (4, 'Ward')]), + ), + ] diff --git a/src/feature_selection/models.py b/src/feature_selection/models.py index 0ad5b360..3435913c 100644 --- a/src/feature_selection/models.py +++ b/src/feature_selection/models.py @@ -34,6 +34,7 @@ class ClusteringAlgorithm(models.IntegerChoices): K_MEANS = 1 SPECTRAL = 2 # TODO: implement in backend BK_MEANS = 3 + WARD = 4 class ClusteringMetric(models.IntegerChoices): diff --git a/src/feature_selection/views.py b/src/feature_selection/views.py index 6e93a058..aaa9e9bf 100644 --- a/src/feature_selection/views.py +++ b/src/feature_selection/views.py @@ -214,6 +214,8 @@ def __get_clustering_parameters_columns(row: pd.Series) -> Tuple[int, Clustering algorithm = ClusteringAlgorithm.K_MEANS elif algorithm_description == 'spectral': algorithm = ClusteringAlgorithm.SPECTRAL + elif algorithm_description == 'ward': + algorithm = ClusteringAlgorithm.WARD else: algorithm = ClusteringAlgorithm.BK_MEANS scoring = ClusteringScoringMethod.C_INDEX if scoring_method == 'concordance-index' \ diff --git a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewClusteringModelForm.tsx b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewClusteringModelForm.tsx index 54faf816..d4cd21a2 100644 --- a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewClusteringModelForm.tsx +++ b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewClusteringModelForm.tsx @@ -15,13 +15,29 @@ interface NewClusteringModelFormProps { } export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => { - // TODO: add an InfoPopup for all the inputs return ( <> + +

K-Means: Groups data by minimizing intra-cluster variance; effective for clustering RNA and miRNA expression profiles.

+

Spectral Clustering: Uses graph-based similarity to identify complex patterns; ideal for integrating methylation and CNA data.

+

BK-Means: A hierarchical variation of K-Means, suitable for layered clustering of clinical and multi-omics datasets.

+

Ward’s Method: Minimizes variance in hierarchical clustering; well-suited for combining RNA and methylation data in integrated analyses.

+ + } + onTop={false} + onEvent='hover' + noBorder + extraClassName='pull-right' + /> + + } options={clusteringAlgorithmOptions} placeholder='Select an algorithm' name='algorithm' @@ -29,7 +45,6 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => { onChange={props.handleChangeParams} /> - {/* TODO: add InfoPopup */} { props.handleChangeOptimalNClusters(checked ?? false) }} @@ -39,7 +54,17 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => { {!props.parameters.lookForOptimalNClusters && + + + } name='nClusters' min={2} max={10} @@ -51,7 +76,22 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => { + +

Cox Regression: A proportional hazards model to identify associations between multi-omics features (RNA, miRNA, methylation) and clinical outcomes over time.

+

Log-Rank Test: A non-parametric test to compare the survival distributions of two or more groups; currently not available.

+ + } + onTop={false} + onEvent='hover' + noBorder + extraClassName='pull-right' + /> + + } options={clusteringMetricOptions} placeholder='Select a metric' name='metric' @@ -64,7 +104,22 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => { + +

C-Index: A measure of concordance between predicted and observed survival outcomes; higher values indicate better model performance.

+

Log Likelihood: The probability of observing the data given the model; lower values indicate better model performance.

+ + } + onTop={false} + onEvent='hover' + noBorder + extraClassName='pull-right' + /> + + } options={clusteringScoringMethodOptions} placeholder='Select a method' name='scoringMethod' @@ -75,7 +130,17 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => { + + + } placeholder='An integer number' type='number' step={1} diff --git a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewRFModelForm.tsx b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewRFModelForm.tsx index f77a3136..db9b737d 100644 --- a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewRFModelForm.tsx +++ b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewRFModelForm.tsx @@ -1,6 +1,8 @@ import React from 'react' import { Form, InputOnChangeData } from 'semantic-ui-react' import { RFParameters } from '../../types' +import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup' +import { InputLabel } from '../../../common/InputLabel' interface NewSVMModelFormProps { /** Getter of the selected params to handle in the form. */ @@ -16,18 +18,38 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => { const lookForOptimalNEstimators = props.parameters.lookForOptimalNEstimators return ( <> - {/* TODO: add InfoPopup */} + { props.handleChangeOptimalNEstimators(checked ?? false) }} - label='Search for the optimal number of trees' + label={ + + + + } /> {!lookForOptimalNEstimators && + + + } type='number' min={10} max={20} @@ -40,7 +62,17 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => { + + + } placeholder='An integer number' type='number' min={3} @@ -52,7 +84,17 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => { + + + } placeholder='An integer number' type='number' step={1} diff --git a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewSVMModelForm.tsx b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewSVMModelForm.tsx index 2451fce9..ed0e07b2 100644 --- a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewSVMModelForm.tsx +++ b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewSVMModelForm.tsx @@ -2,6 +2,8 @@ import React from 'react' import { Form, InputOnChangeData } from 'semantic-ui-react' import { SVMKernelOptions } from '../../utils' import { SVMParameters } from '../../types' +import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup' +import { InputLabel } from '../../../common/InputLabel' interface NewSVMModelFormProps { /** Getter of the selected params to handle in the form. */ @@ -11,13 +13,28 @@ interface NewSVMModelFormProps { } export const NewSVMModelForm = (props: NewSVMModelFormProps) => { - // TODO: add an InfoPopup for all the inputs return ( <> + +

Linear Kernel: Best for linearly separable data; commonly used for simple genomic or clinical feature classification.

+

Polynomial Kernel: Captures non-linear patterns; effective for complex relationships in multi-omics data.

+

RBF Kernel: Maps data to a higher-dimensional space; ideal for handling non-linear separations in RNA and methylation analyses.

+ + } + onTop={false} + onEvent='hover' + noBorder + extraClassName='pull-right' + /> + + } options={SVMKernelOptions} placeholder='Select a kernel' name='kernel' @@ -28,7 +45,17 @@ export const NewSVMModelForm = (props: NewSVMModelFormProps) => { + + + } placeholder='100-2000' name='maxIterations' value={props.parameters.maxIterations ?? ''} @@ -37,7 +64,17 @@ export const NewSVMModelForm = (props: NewSVMModelFormProps) => { + + + } placeholder='An integer number' type='number' step={1} diff --git a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewTrainedModelModal.tsx b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewTrainedModelModal.tsx index e4d4fb5d..3c4e01de 100644 --- a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewTrainedModelModal.tsx +++ b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewTrainedModelModal.tsx @@ -10,6 +10,8 @@ import { DjangoCGDSStudy, DjangoUserFile } from '../../../../utils/django_interf import ky from 'ky' import { NewClusteringModelForm } from './NewClusteringModelForm' import { NewRFModelForm } from './NewRFModelForm' +import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup' +import { InputLabel } from '../../../common/InputLabel' declare const urlNewTrainedModel: string @@ -437,7 +439,17 @@ export const NewTrainedModelModal = (props: NewTrainedModelModalProps) => { + + + } placeholder='An integer number' type='number' step={1} diff --git a/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx b/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx index 5535b83a..1e7d3d50 100644 --- a/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx +++ b/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx @@ -32,6 +32,10 @@ export const ClusteringAlgorithmLabel = (props: ClusteringAlgorithmLabelProps) = color = 'blue' description = 'Bisecting KMeans' break + case ClusteringAlgorithm.WARD: + color = 'blue' + description = 'Ward' + break default: color = 'blue' description = '' diff --git a/src/frontend/static/frontend/src/components/biomarkers/types.ts b/src/frontend/static/frontend/src/components/biomarkers/types.ts index 20b78bb5..9411a955 100644 --- a/src/frontend/static/frontend/src/components/biomarkers/types.ts +++ b/src/frontend/static/frontend/src/components/biomarkers/types.ts @@ -209,7 +209,8 @@ enum FitnessFunction { enum ClusteringAlgorithm { K_MEANS = 1, SPECTRAL = 2, - BK_MEANS = 3 + BK_MEANS = 3, + WARD = 4 } /** Clustering metric to optimize. */ diff --git a/src/frontend/static/frontend/src/components/biomarkers/utils.ts b/src/frontend/static/frontend/src/components/biomarkers/utils.ts index a51237af..cbc13eca 100644 --- a/src/frontend/static/frontend/src/components/biomarkers/utils.ts +++ b/src/frontend/static/frontend/src/components/biomarkers/utils.ts @@ -37,13 +37,14 @@ const SVMKernelOptions: DropdownItemProps[] = [ const clusteringAlgorithmOptions: DropdownItemProps[] = [ { key: ClusteringAlgorithm.K_MEANS, text: 'K-Means', value: ClusteringAlgorithm.K_MEANS }, { key: ClusteringAlgorithm.SPECTRAL, text: 'Spectral', value: ClusteringAlgorithm.SPECTRAL }, - { key: ClusteringAlgorithm.BK_MEANS, text: 'BK-Means', value: ClusteringAlgorithm.BK_MEANS } + { key: ClusteringAlgorithm.BK_MEANS, text: 'BK-Means', value: ClusteringAlgorithm.BK_MEANS }, + { key: ClusteringAlgorithm.WARD, text: 'Ward', value: ClusteringAlgorithm.WARD } ] /** Available options for a Clustering metric to optimize. */ const clusteringMetricOptions: DropdownItemProps[] = [ { key: ClusteringMetric.COX_REGRESSION, text: 'Cox-Regression', value: ClusteringMetric.COX_REGRESSION }, - { key: ClusteringMetric.LOG_RANK_TEST, text: 'Log-Rank test', value: ClusteringMetric.LOG_RANK_TEST, disabled: true } // TODO: implement in backend + { key: ClusteringMetric.LOG_RANK_TEST, text: 'Log-Rank test', value: ClusteringMetric.LOG_RANK_TEST, disabled: true } ] /** Available options for a Clustering scoring method for Cox-Regression. */ diff --git a/src/statistical_properties/stats_service.py b/src/statistical_properties/stats_service.py index d27bc9d1..c82d8f71 100644 --- a/src/statistical_properties/stats_service.py +++ b/src/statistical_properties/stats_service.py @@ -3,8 +3,9 @@ import numpy as np import pandas as pd from lifelines import CoxPHFitter -from sklearn.metrics import mean_squared_error, r2_score +from sklearn.metrics import mean_squared_error, r2_score, silhouette_score from sklearn.model_selection import GridSearchCV, StratifiedKFold +from sklearn.cluster import AgglomerativeClustering from sksurv.metrics import concordance_index_censored from common.datasets_utils import get_common_samples, generate_molecules_file, format_data, \ generate_clinical_file, generate_molecules_dataframe, check_sample_classes, \ @@ -113,13 +114,19 @@ def __compute_stat_validation(stat_validation: StatisticalValidation, molecules_ # Makes predictions if is_regression: check_if_stopped(is_aborted, ExperimentStopped) - predictions = classifier.predict(molecules_df) + if isinstance(classifier, AgglomerativeClustering): + predictions = classifier.fit_predict(molecules_df) + else: + predictions = classifier.predict(molecules_df) # Gets all the metrics for the SVM or RF check_if_stopped(is_aborted, ExperimentStopped) y_true = clinical_data['time'] stat_validation.mean_squared_error = mean_squared_error(y_true, predictions) - stat_validation.c_index = classifier.score(molecules_df, clinical_data) + if isinstance(classifier, AgglomerativeClustering): + stat_validation.c_index = silhouette_score(molecules_df, predictions) + else: + stat_validation.c_index = classifier.score(molecules_df, clinical_data) stat_validation.r2_score = r2_score(y_true, predictions) # TODO: add here all the metrics for every Source type diff --git a/src/statistical_properties/survival_functions.py b/src/statistical_properties/survival_functions.py index bde6f60d..66e37760 100644 --- a/src/statistical_properties/survival_functions.py +++ b/src/statistical_properties/survival_functions.py @@ -5,6 +5,7 @@ from lifelines.statistics import logrank_test from common.utils import get_subset_of_features from feature_selection.fs_models import ClusteringModels +from sklearn.cluster import AgglomerativeClustering KaplanMeierSample = Tuple[ int, @@ -138,7 +139,10 @@ def generate_survival_groups_by_clustering( molecules_df = get_subset_of_features(molecules_df, molecules_df.index) # Gets the groups - clustering_result = classifier.predict(molecules_df.values) + if isinstance(classifier, AgglomerativeClustering): + clustering_result = classifier.fit_predict(molecules_df.values) + else: + clustering_result = classifier.predict(molecules_df.values) # Retrieves the data for every group and stores the survival function data: List[Dict[str, LabelOrKaplanMeierResult]] = []