Skip to content

Commit 35d3d34

Browse files
authored
Merge pull request #59 from omics-datascience/feature/clustering_algorithm_support
Feature/clustering algorithm support
2 parents 0064cf9 + 8c18789 commit 35d3d34

File tree

14 files changed

+229
-26
lines changed

14 files changed

+229
-26
lines changed

src/feature_selection/fs_algorithms_spark.py

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ def __get_clustering_algorithm_value(cluster_algorithm: ClusteringAlgorithm) ->
3434
return 'spectral'
3535
if cluster_algorithm == ClusteringAlgorithm.BK_MEANS:
3636
return 'bk_means'
37+
if cluster_algorithm == ClusteringAlgorithm.WARD:
38+
return 'ward'
3739
return 'k_means' # Default is kmeans
3840

3941

src/feature_selection/fs_models.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Literal, Union, Optional
22
from django.conf import settings
3-
from sklearn.cluster import KMeans, SpectralClustering, BisectingKMeans
3+
from sklearn.cluster import KMeans, SpectralClustering, BisectingKMeans, AgglomerativeClustering
44
from sksurv.ensemble import RandomSurvivalForest
55
from sksurv.svm import FastKernelSurvivalSVM
66
from .models import ClusteringAlgorithm
@@ -12,7 +12,7 @@
1212
SVMOptimizerOptions = Literal["avltree", "rbtree"]
1313

1414
# Available models for clustering
15-
ClusteringModels = Union[KMeans, SpectralClustering, BisectingKMeans]
15+
ClusteringModels = Union[KMeans, SpectralClustering, BisectingKMeans, AgglomerativeClustering]
1616

1717

1818
def get_clustering_model(clustering_algorithm: ClusteringAlgorithm,
@@ -28,6 +28,8 @@ def get_clustering_model(clustering_algorithm: ClusteringAlgorithm,
2828
return KMeans(n_clusters=number_of_clusters, random_state=random_state, n_init='auto')
2929
elif clustering_algorithm == ClusteringAlgorithm.SPECTRAL:
3030
return SpectralClustering(n_clusters=number_of_clusters, random_state=random_state)
31+
elif clustering_algorithm == ClusteringAlgorithm.WARD:
32+
return AgglomerativeClustering(n_clusters=number_of_clusters, linkage='ward')
3133
elif clustering_algorithm == ClusteringAlgorithm.BK_MEANS:
3234
return BisectingKMeans(n_clusters=number_of_clusters, random_state=random_state)
3335

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Generated by Django 4.2.15 on 2025-01-02 20:33
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('feature_selection', '0056_alter_clusteringparameters_algorithm_and_more'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='clusteringparameters',
15+
name='algorithm',
16+
field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means'), (4, 'Ward')], default=1),
17+
),
18+
migrations.AlterField(
19+
model_name='clusteringtimesrecord',
20+
name='algorithm',
21+
field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means'), (4, 'Ward')]),
22+
),
23+
]

src/feature_selection/models.py

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class ClusteringAlgorithm(models.IntegerChoices):
3434
K_MEANS = 1
3535
SPECTRAL = 2 # TODO: implement in backend
3636
BK_MEANS = 3
37+
WARD = 4
3738

3839

3940
class ClusteringMetric(models.IntegerChoices):

src/feature_selection/views.py

+2
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,8 @@ def __get_clustering_parameters_columns(row: pd.Series) -> Tuple[int, Clustering
214214
algorithm = ClusteringAlgorithm.K_MEANS
215215
elif algorithm_description == 'spectral':
216216
algorithm = ClusteringAlgorithm.SPECTRAL
217+
elif algorithm_description == 'ward':
218+
algorithm = ClusteringAlgorithm.WARD
217219
else:
218220
algorithm = ClusteringAlgorithm.BK_MEANS
219221
scoring = ClusteringScoringMethod.C_INDEX if scoring_method == 'concordance-index' \

src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewClusteringModelForm.tsx

+72-7
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,36 @@ interface NewClusteringModelFormProps {
1515
}
1616

1717
export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
18-
// TODO: add an InfoPopup for all the inputs
1918
return (
2019
<>
2120
<Form.Select
2221
fluid
2322
selectOnBlur={false}
24-
label='Algorithm'
23+
label={
24+
<InputLabel label='Algorithm'>
25+
<InfoPopup
26+
content={
27+
<>
28+
<p>K-Means: Groups data by minimizing intra-cluster variance; effective for clustering RNA and miRNA expression profiles.</p>
29+
<p>Spectral Clustering: Uses graph-based similarity to identify complex patterns; ideal for integrating methylation and CNA data.</p>
30+
<p>BK-Means: A hierarchical variation of K-Means, suitable for layered clustering of clinical and multi-omics datasets.</p>
31+
<p>Ward’s Method: Minimizes variance in hierarchical clustering; well-suited for combining RNA and methylation data in integrated analyses.</p>
32+
</>
33+
}
34+
onTop={false}
35+
onEvent='hover'
36+
noBorder
37+
extraClassName='pull-right'
38+
/>
39+
</InputLabel>
40+
}
2541
options={clusteringAlgorithmOptions}
2642
placeholder='Select an algorithm'
2743
name='algorithm'
2844
value={props.parameters.algorithm}
2945
onChange={props.handleChangeParams}
3046
/>
3147

32-
{/* TODO: add InfoPopup */}
3348
<Form.Checkbox
3449
checked={props.parameters.lookForOptimalNClusters}
3550
onChange={(_e, { checked }) => { props.handleChangeOptimalNClusters(checked ?? false) }}
@@ -39,7 +54,17 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
3954
{!props.parameters.lookForOptimalNClusters &&
4055
<Form.Input
4156
type='number'
42-
label='Number of clusters'
57+
label={
58+
<InputLabel label='Number of clusters'>
59+
<InfoPopup
60+
content='The number of clusters to group the data into. The optimal number of clusters can be found by looking for the elbow in the curve of the sum of squared distances between samples and their closest cluster center.'
61+
onTop={false}
62+
onEvent='hover'
63+
noBorder
64+
extraClassName='pull-right'
65+
/>
66+
</InputLabel>
67+
}
4368
name='nClusters'
4469
min={2}
4570
max={10}
@@ -51,7 +76,22 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
5176
<Form.Select
5277
fluid
5378
selectOnBlur={false}
54-
label='Metric'
79+
label={
80+
<InputLabel label='Metric'>
81+
<InfoPopup
82+
content={
83+
<>
84+
<p>Cox Regression: A proportional hazards model to identify associations between multi-omics features (RNA, miRNA, methylation) and clinical outcomes over time.</p>
85+
<p>Log-Rank Test: A non-parametric test to compare the survival distributions of two or more groups; currently not available.</p>
86+
</>
87+
}
88+
onTop={false}
89+
onEvent='hover'
90+
noBorder
91+
extraClassName='pull-right'
92+
/>
93+
</InputLabel>
94+
}
5595
options={clusteringMetricOptions}
5696
placeholder='Select a metric'
5797
name='metric'
@@ -64,7 +104,22 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
64104
<Form.Select
65105
fluid
66106
selectOnBlur={false}
67-
label='Scoring method'
107+
label={
108+
<InputLabel label='Scoring method'>
109+
<InfoPopup
110+
content={
111+
<>
112+
<p>C-Index: A measure of concordance between predicted and observed survival outcomes; higher values indicate better model performance.</p>
113+
<p>Log Likelihood: The probability of observing the data given the model; lower values indicate better model performance.</p>
114+
</>
115+
}
116+
onTop={false}
117+
onEvent='hover'
118+
noBorder
119+
extraClassName='pull-right'
120+
/>
121+
</InputLabel>
122+
}
68123
options={clusteringScoringMethodOptions}
69124
placeholder='Select a method'
70125
name='scoringMethod'
@@ -75,7 +130,17 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
75130

76131
<Form.Group widths='equal'>
77132
<Form.Input
78-
label='Random state'
133+
label={
134+
<InputLabel label='Random state'>
135+
<InfoPopup
136+
content='The seed used by the random number generator to ensure reproducibility of the results.'
137+
onTop={false}
138+
onEvent='hover'
139+
noBorder
140+
extraClassName='pull-right'
141+
/>
142+
</InputLabel>
143+
}
79144
placeholder='An integer number'
80145
type='number'
81146
step={1}

src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewRFModelForm.tsx

+47-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import React from 'react'
22
import { Form, InputOnChangeData } from 'semantic-ui-react'
33
import { RFParameters } from '../../types'
4+
import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup'
5+
import { InputLabel } from '../../../common/InputLabel'
46

57
interface NewSVMModelFormProps {
68
/** Getter of the selected params to handle in the form. */
@@ -16,18 +18,38 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => {
1618
const lookForOptimalNEstimators = props.parameters.lookForOptimalNEstimators
1719
return (
1820
<>
19-
{/* TODO: add InfoPopup */}
21+
2022
<Form.Checkbox
2123
checked={lookForOptimalNEstimators}
2224
onChange={(_e, { checked }) => { props.handleChangeOptimalNEstimators(checked ?? false) }}
23-
label='Search for the optimal number of trees'
25+
label={
26+
<InputLabel label='Search for the optimal number of trees'>
27+
<InfoPopup
28+
content='This option is useful when the number of samples in the clinical data is small or there are few observed events, setting this value increases the robustness of the model in such cases avoiding problems with NaN values'
29+
onTop={false}
30+
onEvent='hover'
31+
noBorder
32+
extraClassName='pull-right'
33+
/>
34+
</InputLabel>
35+
}
2436
/>
2537

2638
<Form.Group widths='equal'>
2739
{!lookForOptimalNEstimators &&
2840
<Form.Input
2941
fluid
30-
label='Number of trees'
42+
label={
43+
<InputLabel label='Number of trees'>
44+
<InfoPopup
45+
content='The number of trees in the forest'
46+
onTop={false}
47+
onEvent='hover'
48+
noBorder
49+
extraClassName='pull-right'
50+
/>
51+
</InputLabel>
52+
}
3153
type='number'
3254
min={10}
3355
max={20}
@@ -40,7 +62,17 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => {
4062

4163
<Form.Input
4264
fluid
43-
label='Max depth'
65+
label={
66+
<InputLabel label='Max depth'>
67+
<InfoPopup
68+
content='The maximum depth of the tree'
69+
onTop={false}
70+
onEvent='hover'
71+
noBorder
72+
extraClassName='pull-right'
73+
/>
74+
</InputLabel>
75+
}
4476
placeholder='An integer number'
4577
type='number'
4678
min={3}
@@ -52,7 +84,17 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => {
5284

5385
<Form.Input
5486
fluid
55-
label='Random state'
87+
label={
88+
<InputLabel label='Random state'>
89+
<InfoPopup
90+
content='Seed used by the random number generator'
91+
onTop={false}
92+
onEvent='hover'
93+
noBorder
94+
extraClassName='pull-right'
95+
/>
96+
</InputLabel>
97+
}
5698
placeholder='An integer number'
5799
type='number'
58100
step={1}

src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewSVMModelForm.tsx

+41-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ import React from 'react'
22
import { Form, InputOnChangeData } from 'semantic-ui-react'
33
import { SVMKernelOptions } from '../../utils'
44
import { SVMParameters } from '../../types'
5+
import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup'
6+
import { InputLabel } from '../../../common/InputLabel'
57

68
interface NewSVMModelFormProps {
79
/** Getter of the selected params to handle in the form. */
@@ -11,13 +13,28 @@ interface NewSVMModelFormProps {
1113
}
1214

1315
export const NewSVMModelForm = (props: NewSVMModelFormProps) => {
14-
// TODO: add an InfoPopup for all the inputs
1516
return (
1617
<>
1718
<Form.Select
1819
fluid
1920
selectOnBlur={false}
20-
label='Kernel'
21+
label={
22+
<InputLabel label='Kernel'>
23+
<InfoPopup
24+
content={
25+
<>
26+
<p>Linear Kernel: Best for linearly separable data; commonly used for simple genomic or clinical feature classification.</p>
27+
<p>Polynomial Kernel: Captures non-linear patterns; effective for complex relationships in multi-omics data.</p>
28+
<p>RBF Kernel: Maps data to a higher-dimensional space; ideal for handling non-linear separations in RNA and methylation analyses.</p>
29+
</>
30+
}
31+
onTop={false}
32+
onEvent='hover'
33+
noBorder
34+
extraClassName='pull-right'
35+
/>
36+
</InputLabel>
37+
}
2138
options={SVMKernelOptions}
2239
placeholder='Select a kernel'
2340
name='kernel'
@@ -28,7 +45,17 @@ export const NewSVMModelForm = (props: NewSVMModelFormProps) => {
2845
<Form.Group widths='equal'>
2946
<Form.Input
3047
fluid
31-
label='Max iterations'
48+
label={
49+
<InputLabel label='Max iterations'>
50+
<InfoPopup
51+
content='The maximum number of iterations to be run'
52+
onTop={false}
53+
onEvent='hover'
54+
noBorder
55+
extraClassName='pull-right'
56+
/>
57+
</InputLabel>
58+
}
3259
placeholder='100-2000'
3360
name='maxIterations'
3461
value={props.parameters.maxIterations ?? ''}
@@ -37,7 +64,17 @@ export const NewSVMModelForm = (props: NewSVMModelFormProps) => {
3764

3865
<Form.Input
3966
fluid
40-
label='Random state'
67+
label={
68+
<InputLabel label='Random state'>
69+
<InfoPopup
70+
content='Seed used by the random number generator'
71+
onTop={false}
72+
onEvent='hover'
73+
noBorder
74+
extraClassName='pull-right'
75+
/>
76+
</InputLabel>
77+
}
4178
placeholder='An integer number'
4279
type='number'
4380
step={1}

src/frontend/static/frontend/src/components/biomarkers/biomarker-details-modal/trained-models/NewTrainedModelModal.tsx

+13-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import { DjangoCGDSStudy, DjangoUserFile } from '../../../../utils/django_interf
1010
import ky from 'ky'
1111
import { NewClusteringModelForm } from './NewClusteringModelForm'
1212
import { NewRFModelForm } from './NewRFModelForm'
13+
import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup'
14+
import { InputLabel } from '../../../common/InputLabel'
1315

1416
declare const urlNewTrainedModel: string
1517

@@ -437,7 +439,17 @@ export const NewTrainedModelModal = (props: NewTrainedModelModalProps) => {
437439

438440
<Form.Input
439441
fluid
440-
label='Number of folds'
442+
label={
443+
<InputLabel label='Number of folds'>
444+
<InfoPopup
445+
content='Defines the number of data splits for cross-validation; ensures robust model evaluation and prevents overfitting.'
446+
onTop={false}
447+
onEvent='hover'
448+
noBorder
449+
extraClassName='pull-right'
450+
/>
451+
</InputLabel>
452+
}
441453
placeholder='An integer number'
442454
type='number'
443455
step={1}

0 commit comments

Comments
 (0)