diff --git a/src/feature_selection/fs_algorithms_spark.py b/src/feature_selection/fs_algorithms_spark.py
index fcec625d..6cff6ab2 100644
--- a/src/feature_selection/fs_algorithms_spark.py
+++ b/src/feature_selection/fs_algorithms_spark.py
@@ -34,6 +34,8 @@ def __get_clustering_algorithm_value(cluster_algorithm: ClusteringAlgorithm) ->
return 'spectral'
if cluster_algorithm == ClusteringAlgorithm.BK_MEANS:
return 'bk_means'
+ if cluster_algorithm == ClusteringAlgorithm.WARD:
+ return 'ward'
return 'k_means' # Default is kmeans
diff --git a/src/feature_selection/fs_models.py b/src/feature_selection/fs_models.py
index a8de8e1d..6ef092a5 100644
--- a/src/feature_selection/fs_models.py
+++ b/src/feature_selection/fs_models.py
@@ -1,6 +1,6 @@
from typing import Literal, Union, Optional
from django.conf import settings
-from sklearn.cluster import KMeans, SpectralClustering, BisectingKMeans
+from sklearn.cluster import KMeans, SpectralClustering, BisectingKMeans, AgglomerativeClustering
from sksurv.ensemble import RandomSurvivalForest
from sksurv.svm import FastKernelSurvivalSVM
from .models import ClusteringAlgorithm
@@ -12,7 +12,7 @@
SVMOptimizerOptions = Literal["avltree", "rbtree"]
# Available models for clustering
-ClusteringModels = Union[KMeans, SpectralClustering, BisectingKMeans]
+ClusteringModels = Union[KMeans, SpectralClustering, BisectingKMeans, AgglomerativeClustering]
def get_clustering_model(clustering_algorithm: ClusteringAlgorithm,
@@ -28,6 +28,8 @@ def get_clustering_model(clustering_algorithm: ClusteringAlgorithm,
return KMeans(n_clusters=number_of_clusters, random_state=random_state, n_init='auto')
elif clustering_algorithm == ClusteringAlgorithm.SPECTRAL:
return SpectralClustering(n_clusters=number_of_clusters, random_state=random_state)
+ elif clustering_algorithm == ClusteringAlgorithm.WARD:
+ return AgglomerativeClustering(n_clusters=number_of_clusters, linkage='ward')
elif clustering_algorithm == ClusteringAlgorithm.BK_MEANS:
return BisectingKMeans(n_clusters=number_of_clusters, random_state=random_state)
diff --git a/src/feature_selection/migrations/0057_alter_clusteringparameters_algorithm_and_more.py b/src/feature_selection/migrations/0057_alter_clusteringparameters_algorithm_and_more.py
new file mode 100644
index 00000000..514406b9
--- /dev/null
+++ b/src/feature_selection/migrations/0057_alter_clusteringparameters_algorithm_and_more.py
@@ -0,0 +1,23 @@
+# Generated by Django 4.2.15 on 2025-01-02 20:33
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('feature_selection', '0056_alter_clusteringparameters_algorithm_and_more'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='clusteringparameters',
+ name='algorithm',
+ field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means'), (4, 'Ward')], default=1),
+ ),
+ migrations.AlterField(
+ model_name='clusteringtimesrecord',
+ name='algorithm',
+ field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means'), (4, 'Ward')]),
+ ),
+ ]
diff --git a/src/feature_selection/models.py b/src/feature_selection/models.py
index 0ad5b360..3435913c 100644
--- a/src/feature_selection/models.py
+++ b/src/feature_selection/models.py
@@ -34,6 +34,7 @@ class ClusteringAlgorithm(models.IntegerChoices):
K_MEANS = 1
SPECTRAL = 2 # TODO: implement in backend
BK_MEANS = 3
+ WARD = 4
class ClusteringMetric(models.IntegerChoices):
diff --git a/src/feature_selection/views.py b/src/feature_selection/views.py
index 6e93a058..aaa9e9bf 100644
--- a/src/feature_selection/views.py
+++ b/src/feature_selection/views.py
@@ -214,6 +214,8 @@ def __get_clustering_parameters_columns(row: pd.Series) -> Tuple[int, Clustering
algorithm = ClusteringAlgorithm.K_MEANS
elif algorithm_description == 'spectral':
algorithm = ClusteringAlgorithm.SPECTRAL
+ elif algorithm_description == 'ward':
+ algorithm = ClusteringAlgorithm.WARD
else:
algorithm = ClusteringAlgorithm.BK_MEANS
scoring = ClusteringScoringMethod.C_INDEX if scoring_method == 'concordance-index' \
diff --git a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewClusteringModelForm.tsx b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewClusteringModelForm.tsx
index 54faf816..d4cd21a2 100644
--- a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewClusteringModelForm.tsx
+++ b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewClusteringModelForm.tsx
@@ -15,13 +15,29 @@ interface NewClusteringModelFormProps {
}
export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
- // TODO: add an InfoPopup for all the inputs
return (
<>
+
+ K-Means: Groups data by minimizing intra-cluster variance; effective for clustering RNA and miRNA expression profiles.
+ Spectral Clustering: Uses graph-based similarity to identify complex patterns; ideal for integrating methylation and CNA data.
+ BK-Means: A hierarchical variation of K-Means, suitable for layered clustering of clinical and multi-omics datasets.
+ Ward’s Method: Minimizes variance in hierarchical clustering; well-suited for combining RNA and methylation data in integrated analyses.
+ >
+ }
+ onTop={false}
+ onEvent='hover'
+ noBorder
+ extraClassName='pull-right'
+ />
+
+ }
options={clusteringAlgorithmOptions}
placeholder='Select an algorithm'
name='algorithm'
@@ -29,7 +45,6 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
onChange={props.handleChangeParams}
/>
- {/* TODO: add InfoPopup */}
{ props.handleChangeOptimalNClusters(checked ?? false) }}
@@ -39,7 +54,17 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
{!props.parameters.lookForOptimalNClusters &&
+
+
+ }
name='nClusters'
min={2}
max={10}
@@ -51,7 +76,22 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
+
+ Cox Regression: A proportional hazards model to identify associations between multi-omics features (RNA, miRNA, methylation) and clinical outcomes over time.
+ Log-Rank Test: A non-parametric test to compare the survival distributions of two or more groups; currently not available.
+ >
+ }
+ onTop={false}
+ onEvent='hover'
+ noBorder
+ extraClassName='pull-right'
+ />
+
+ }
options={clusteringMetricOptions}
placeholder='Select a metric'
name='metric'
@@ -64,7 +104,22 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
+
+ C-Index: A measure of concordance between predicted and observed survival outcomes; higher values indicate better model performance.
+ Log Likelihood: The probability of observing the data given the model; lower values indicate better model performance.
+ >
+ }
+ onTop={false}
+ onEvent='hover'
+ noBorder
+ extraClassName='pull-right'
+ />
+
+ }
options={clusteringScoringMethodOptions}
placeholder='Select a method'
name='scoringMethod'
@@ -75,7 +130,17 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
+
+
+ }
placeholder='An integer number'
type='number'
step={1}
diff --git a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewRFModelForm.tsx b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewRFModelForm.tsx
index f77a3136..db9b737d 100644
--- a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewRFModelForm.tsx
+++ b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewRFModelForm.tsx
@@ -1,6 +1,8 @@
import React from 'react'
import { Form, InputOnChangeData } from 'semantic-ui-react'
import { RFParameters } from '../../types'
+import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup'
+import { InputLabel } from '../../../common/InputLabel'
interface NewSVMModelFormProps {
/** Getter of the selected params to handle in the form. */
@@ -16,18 +18,38 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => {
const lookForOptimalNEstimators = props.parameters.lookForOptimalNEstimators
return (
<>
- {/* TODO: add InfoPopup */}
+
{ props.handleChangeOptimalNEstimators(checked ?? false) }}
- label='Search for the optimal number of trees'
+ label={
+
+
+
+ }
/>
{!lookForOptimalNEstimators &&
+
+
+ }
type='number'
min={10}
max={20}
@@ -40,7 +62,17 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => {
+
+
+ }
placeholder='An integer number'
type='number'
min={3}
@@ -52,7 +84,17 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => {
+
+
+ }
placeholder='An integer number'
type='number'
step={1}
diff --git a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewSVMModelForm.tsx b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewSVMModelForm.tsx
index 2451fce9..ed0e07b2 100644
--- a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewSVMModelForm.tsx
+++ b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewSVMModelForm.tsx
@@ -2,6 +2,8 @@ import React from 'react'
import { Form, InputOnChangeData } from 'semantic-ui-react'
import { SVMKernelOptions } from '../../utils'
import { SVMParameters } from '../../types'
+import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup'
+import { InputLabel } from '../../../common/InputLabel'
interface NewSVMModelFormProps {
/** Getter of the selected params to handle in the form. */
@@ -11,13 +13,28 @@ interface NewSVMModelFormProps {
}
export const NewSVMModelForm = (props: NewSVMModelFormProps) => {
- // TODO: add an InfoPopup for all the inputs
return (
<>
+
+ Linear Kernel: Best for linearly separable data; commonly used for simple genomic or clinical feature classification.
+ Polynomial Kernel: Captures non-linear patterns; effective for complex relationships in multi-omics data.
+ RBF Kernel: Maps data to a higher-dimensional space; ideal for handling non-linear separations in RNA and methylation analyses.
+ >
+ }
+ onTop={false}
+ onEvent='hover'
+ noBorder
+ extraClassName='pull-right'
+ />
+
+ }
options={SVMKernelOptions}
placeholder='Select a kernel'
name='kernel'
@@ -28,7 +45,17 @@ export const NewSVMModelForm = (props: NewSVMModelFormProps) => {
+
+
+ }
placeholder='100-2000'
name='maxIterations'
value={props.parameters.maxIterations ?? ''}
@@ -37,7 +64,17 @@ export const NewSVMModelForm = (props: NewSVMModelFormProps) => {
+
+
+ }
placeholder='An integer number'
type='number'
step={1}
diff --git a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewTrainedModelModal.tsx b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewTrainedModelModal.tsx
index e4d4fb5d..3c4e01de 100644
--- a/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewTrainedModelModal.tsx
+++ b/src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewTrainedModelModal.tsx
@@ -10,6 +10,8 @@ import { DjangoCGDSStudy, DjangoUserFile } from '../../../../utils/django_interf
import ky from 'ky'
import { NewClusteringModelForm } from './NewClusteringModelForm'
import { NewRFModelForm } from './NewRFModelForm'
+import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup'
+import { InputLabel } from '../../../common/InputLabel'
declare const urlNewTrainedModel: string
@@ -437,7 +439,17 @@ export const NewTrainedModelModal = (props: NewTrainedModelModalProps) => {
+
+
+ }
placeholder='An integer number'
type='number'
step={1}
diff --git a/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx b/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx
index 5535b83a..1e7d3d50 100644
--- a/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx
+++ b/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx
@@ -32,6 +32,10 @@ export const ClusteringAlgorithmLabel = (props: ClusteringAlgorithmLabelProps) =
color = 'blue'
description = 'Bisecting KMeans'
break
+ case ClusteringAlgorithm.WARD:
+ color = 'blue'
+ description = 'Ward'
+ break
default:
color = 'blue'
description = ''
diff --git a/src/frontend/static/frontend/src/components/biomarkers/types.ts b/src/frontend/static/frontend/src/components/biomarkers/types.ts
index 20b78bb5..9411a955 100644
--- a/src/frontend/static/frontend/src/components/biomarkers/types.ts
+++ b/src/frontend/static/frontend/src/components/biomarkers/types.ts
@@ -209,7 +209,8 @@ enum FitnessFunction {
enum ClusteringAlgorithm {
K_MEANS = 1,
SPECTRAL = 2,
- BK_MEANS = 3
+ BK_MEANS = 3,
+ WARD = 4
}
/** Clustering metric to optimize. */
diff --git a/src/frontend/static/frontend/src/components/biomarkers/utils.ts b/src/frontend/static/frontend/src/components/biomarkers/utils.ts
index a51237af..cbc13eca 100644
--- a/src/frontend/static/frontend/src/components/biomarkers/utils.ts
+++ b/src/frontend/static/frontend/src/components/biomarkers/utils.ts
@@ -37,13 +37,14 @@ const SVMKernelOptions: DropdownItemProps[] = [
const clusteringAlgorithmOptions: DropdownItemProps[] = [
{ key: ClusteringAlgorithm.K_MEANS, text: 'K-Means', value: ClusteringAlgorithm.K_MEANS },
{ key: ClusteringAlgorithm.SPECTRAL, text: 'Spectral', value: ClusteringAlgorithm.SPECTRAL },
- { key: ClusteringAlgorithm.BK_MEANS, text: 'BK-Means', value: ClusteringAlgorithm.BK_MEANS }
+ { key: ClusteringAlgorithm.BK_MEANS, text: 'BK-Means', value: ClusteringAlgorithm.BK_MEANS },
+ { key: ClusteringAlgorithm.WARD, text: 'Ward', value: ClusteringAlgorithm.WARD }
]
/** Available options for a Clustering metric to optimize. */
const clusteringMetricOptions: DropdownItemProps[] = [
{ key: ClusteringMetric.COX_REGRESSION, text: 'Cox-Regression', value: ClusteringMetric.COX_REGRESSION },
- { key: ClusteringMetric.LOG_RANK_TEST, text: 'Log-Rank test', value: ClusteringMetric.LOG_RANK_TEST, disabled: true } // TODO: implement in backend
+ { key: ClusteringMetric.LOG_RANK_TEST, text: 'Log-Rank test', value: ClusteringMetric.LOG_RANK_TEST, disabled: true }
]
/** Available options for a Clustering scoring method for Cox-Regression. */
diff --git a/src/statistical_properties/stats_service.py b/src/statistical_properties/stats_service.py
index d27bc9d1..c82d8f71 100644
--- a/src/statistical_properties/stats_service.py
+++ b/src/statistical_properties/stats_service.py
@@ -3,8 +3,9 @@
import numpy as np
import pandas as pd
from lifelines import CoxPHFitter
-from sklearn.metrics import mean_squared_error, r2_score
+from sklearn.metrics import mean_squared_error, r2_score, silhouette_score
from sklearn.model_selection import GridSearchCV, StratifiedKFold
+from sklearn.cluster import AgglomerativeClustering
from sksurv.metrics import concordance_index_censored
from common.datasets_utils import get_common_samples, generate_molecules_file, format_data, \
generate_clinical_file, generate_molecules_dataframe, check_sample_classes, \
@@ -113,13 +114,19 @@ def __compute_stat_validation(stat_validation: StatisticalValidation, molecules_
# Makes predictions
if is_regression:
check_if_stopped(is_aborted, ExperimentStopped)
- predictions = classifier.predict(molecules_df)
+ if isinstance(classifier, AgglomerativeClustering):
+ predictions = classifier.fit_predict(molecules_df)
+ else:
+ predictions = classifier.predict(molecules_df)
# Gets all the metrics for the SVM or RF
check_if_stopped(is_aborted, ExperimentStopped)
y_true = clinical_data['time']
stat_validation.mean_squared_error = mean_squared_error(y_true, predictions)
- stat_validation.c_index = classifier.score(molecules_df, clinical_data)
+ if isinstance(classifier, AgglomerativeClustering):
+ stat_validation.c_index = silhouette_score(molecules_df, predictions)
+ else:
+ stat_validation.c_index = classifier.score(molecules_df, clinical_data)
stat_validation.r2_score = r2_score(y_true, predictions)
# TODO: add here all the metrics for every Source type
diff --git a/src/statistical_properties/survival_functions.py b/src/statistical_properties/survival_functions.py
index bde6f60d..66e37760 100644
--- a/src/statistical_properties/survival_functions.py
+++ b/src/statistical_properties/survival_functions.py
@@ -5,6 +5,7 @@
from lifelines.statistics import logrank_test
from common.utils import get_subset_of_features
from feature_selection.fs_models import ClusteringModels
+from sklearn.cluster import AgglomerativeClustering
KaplanMeierSample = Tuple[
int,
@@ -138,7 +139,10 @@ def generate_survival_groups_by_clustering(
molecules_df = get_subset_of_features(molecules_df, molecules_df.index)
# Gets the groups
- clustering_result = classifier.predict(molecules_df.values)
+ if isinstance(classifier, AgglomerativeClustering):
+ clustering_result = classifier.fit_predict(molecules_df.values)
+ else:
+ clustering_result = classifier.predict(molecules_df.values)
# Retrieves the data for every group and stores the survival function
data: List[Dict[str, LabelOrKaplanMeierResult]] = []