Skip to content

Feature/clustering algorithm support #59

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/feature_selection/fs_algorithms_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def __get_clustering_algorithm_value(cluster_algorithm: ClusteringAlgorithm) ->
return 'spectral'
if cluster_algorithm == ClusteringAlgorithm.BK_MEANS:
return 'bk_means'
if cluster_algorithm == ClusteringAlgorithm.WARD:
return 'ward'
return 'k_means' # Default is kmeans


Expand Down
6 changes: 4 additions & 2 deletions src/feature_selection/fs_models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Literal, Union, Optional
from django.conf import settings
from sklearn.cluster import KMeans, SpectralClustering, BisectingKMeans
from sklearn.cluster import KMeans, SpectralClustering, BisectingKMeans, AgglomerativeClustering
from sksurv.ensemble import RandomSurvivalForest
from sksurv.svm import FastKernelSurvivalSVM
from .models import ClusteringAlgorithm
Expand All @@ -12,7 +12,7 @@
SVMOptimizerOptions = Literal["avltree", "rbtree"]

# Available models for clustering
ClusteringModels = Union[KMeans, SpectralClustering, BisectingKMeans]
ClusteringModels = Union[KMeans, SpectralClustering, BisectingKMeans, AgglomerativeClustering]


def get_clustering_model(clustering_algorithm: ClusteringAlgorithm,
Expand All @@ -28,6 +28,8 @@ def get_clustering_model(clustering_algorithm: ClusteringAlgorithm,
return KMeans(n_clusters=number_of_clusters, random_state=random_state, n_init='auto')
elif clustering_algorithm == ClusteringAlgorithm.SPECTRAL:
return SpectralClustering(n_clusters=number_of_clusters, random_state=random_state)
elif clustering_algorithm == ClusteringAlgorithm.WARD:
return AgglomerativeClustering(n_clusters=number_of_clusters, linkage='ward')
elif clustering_algorithm == ClusteringAlgorithm.BK_MEANS:
return BisectingKMeans(n_clusters=number_of_clusters, random_state=random_state)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 4.2.15 on 2025-01-02 20:33

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('feature_selection', '0056_alter_clusteringparameters_algorithm_and_more'),
]

operations = [
migrations.AlterField(
model_name='clusteringparameters',
name='algorithm',
field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means'), (4, 'Ward')], default=1),
),
migrations.AlterField(
model_name='clusteringtimesrecord',
name='algorithm',
field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means'), (4, 'Ward')]),
),
]
1 change: 1 addition & 0 deletions src/feature_selection/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class ClusteringAlgorithm(models.IntegerChoices):
K_MEANS = 1
SPECTRAL = 2 # TODO: implement in backend
BK_MEANS = 3
WARD = 4


class ClusteringMetric(models.IntegerChoices):
Expand Down
2 changes: 2 additions & 0 deletions src/feature_selection/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ def __get_clustering_parameters_columns(row: pd.Series) -> Tuple[int, Clustering
algorithm = ClusteringAlgorithm.K_MEANS
elif algorithm_description == 'spectral':
algorithm = ClusteringAlgorithm.SPECTRAL
elif algorithm_description == 'ward':
algorithm = ClusteringAlgorithm.WARD
else:
algorithm = ClusteringAlgorithm.BK_MEANS
scoring = ClusteringScoringMethod.C_INDEX if scoring_method == 'concordance-index' \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,36 @@ interface NewClusteringModelFormProps {
}

export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
// TODO: add an InfoPopup for all the inputs
return (
<>
<Form.Select
fluid
selectOnBlur={false}
label='Algorithm'
label={
<InputLabel label='Algorithm'>
<InfoPopup
content={
<>
<p>K-Means: Groups data by minimizing intra-cluster variance; effective for clustering RNA and miRNA expression profiles.</p>
<p>Spectral Clustering: Uses graph-based similarity to identify complex patterns; ideal for integrating methylation and CNA data.</p>
<p>BK-Means: A hierarchical variation of K-Means, suitable for layered clustering of clinical and multi-omics datasets.</p>
<p>Ward’s Method: Minimizes variance in hierarchical clustering; well-suited for combining RNA and methylation data in integrated analyses.</p>
</>
}
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
options={clusteringAlgorithmOptions}
placeholder='Select an algorithm'
name='algorithm'
value={props.parameters.algorithm}
onChange={props.handleChangeParams}
/>

{/* TODO: add InfoPopup */}
<Form.Checkbox
checked={props.parameters.lookForOptimalNClusters}
onChange={(_e, { checked }) => { props.handleChangeOptimalNClusters(checked ?? false) }}
Expand All @@ -39,7 +54,17 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
{!props.parameters.lookForOptimalNClusters &&
<Form.Input
type='number'
label='Number of clusters'
label={
<InputLabel label='Number of clusters'>
<InfoPopup
content='The number of clusters to group the data into. The optimal number of clusters can be found by looking for the elbow in the curve of the sum of squared distances between samples and their closest cluster center.'
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
name='nClusters'
min={2}
max={10}
Expand All @@ -51,7 +76,22 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
<Form.Select
fluid
selectOnBlur={false}
label='Metric'
label={
<InputLabel label='Metric'>
<InfoPopup
content={
<>
<p>Cox Regression: A proportional hazards model to identify associations between multi-omics features (RNA, miRNA, methylation) and clinical outcomes over time.</p>
<p>Log-Rank Test: A non-parametric test to compare the survival distributions of two or more groups; currently not available.</p>
</>
}
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
options={clusteringMetricOptions}
placeholder='Select a metric'
name='metric'
Expand All @@ -64,7 +104,22 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {
<Form.Select
fluid
selectOnBlur={false}
label='Scoring method'
label={
<InputLabel label='Scoring method'>
<InfoPopup
content={
<>
<p>C-Index: A measure of concordance between predicted and observed survival outcomes; higher values indicate better model performance.</p>
<p>Log Likelihood: The probability of observing the data given the model; lower values indicate better model performance.</p>
</>
}
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
options={clusteringScoringMethodOptions}
placeholder='Select a method'
name='scoringMethod'
Expand All @@ -75,7 +130,17 @@ export const NewClusteringModelForm = (props: NewClusteringModelFormProps) => {

<Form.Group widths='equal'>
<Form.Input
label='Random state'
label={
<InputLabel label='Random state'>
<InfoPopup
content='The seed used by the random number generator to ensure reproducibility of the results.'
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
placeholder='An integer number'
type='number'
step={1}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import React from 'react'
import { Form, InputOnChangeData } from 'semantic-ui-react'
import { RFParameters } from '../../types'
import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup'
import { InputLabel } from '../../../common/InputLabel'

interface NewSVMModelFormProps {
/** Getter of the selected params to handle in the form. */
Expand All @@ -16,18 +18,38 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => {
const lookForOptimalNEstimators = props.parameters.lookForOptimalNEstimators
return (
<>
{/* TODO: add InfoPopup */}

<Form.Checkbox
checked={lookForOptimalNEstimators}
onChange={(_e, { checked }) => { props.handleChangeOptimalNEstimators(checked ?? false) }}
label='Search for the optimal number of trees'
label={
<InputLabel label='Search for the optimal number of trees'>
<InfoPopup
content='This option is useful when the number of samples in the clinical data is small or there are few observed events, setting this value increases the robustness of the model in such cases avoiding problems with NaN values'
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
/>

<Form.Group widths='equal'>
{!lookForOptimalNEstimators &&
<Form.Input
fluid
label='Number of trees'
label={
<InputLabel label='Number of trees'>
<InfoPopup
content='The number of trees in the forest'
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
type='number'
min={10}
max={20}
Expand All @@ -40,7 +62,17 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => {

<Form.Input
fluid
label='Max depth'
label={
<InputLabel label='Max depth'>
<InfoPopup
content='The maximum depth of the tree'
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
placeholder='An integer number'
type='number'
min={3}
Expand All @@ -52,7 +84,17 @@ export const NewRFModelForm = (props: NewSVMModelFormProps) => {

<Form.Input
fluid
label='Random state'
label={
<InputLabel label='Random state'>
<InfoPopup
content='Seed used by the random number generator'
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
placeholder='An integer number'
type='number'
step={1}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import React from 'react'
import { Form, InputOnChangeData } from 'semantic-ui-react'
import { SVMKernelOptions } from '../../utils'
import { SVMParameters } from '../../types'
import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup'
import { InputLabel } from '../../../common/InputLabel'

interface NewSVMModelFormProps {
/** Getter of the selected params to handle in the form. */
Expand All @@ -11,13 +13,28 @@ interface NewSVMModelFormProps {
}

export const NewSVMModelForm = (props: NewSVMModelFormProps) => {
// TODO: add an InfoPopup for all the inputs
return (
<>
<Form.Select
fluid
selectOnBlur={false}
label='Kernel'
label={
<InputLabel label='Kernel'>
<InfoPopup
content={
<>
<p>Linear Kernel: Best for linearly separable data; commonly used for simple genomic or clinical feature classification.</p>
<p>Polynomial Kernel: Captures non-linear patterns; effective for complex relationships in multi-omics data.</p>
<p>RBF Kernel: Maps data to a higher-dimensional space; ideal for handling non-linear separations in RNA and methylation analyses.</p>
</>
}
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
options={SVMKernelOptions}
placeholder='Select a kernel'
name='kernel'
Expand All @@ -28,7 +45,17 @@ export const NewSVMModelForm = (props: NewSVMModelFormProps) => {
<Form.Group widths='equal'>
<Form.Input
fluid
label='Max iterations'
label={
<InputLabel label='Max iterations'>
<InfoPopup
content='The maximum number of iterations to be run'
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
placeholder='100-2000'
name='maxIterations'
value={props.parameters.maxIterations ?? ''}
Expand All @@ -37,7 +64,17 @@ export const NewSVMModelForm = (props: NewSVMModelFormProps) => {

<Form.Input
fluid
label='Random state'
label={
<InputLabel label='Random state'>
<InfoPopup
content='Seed used by the random number generator'
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
placeholder='An integer number'
type='number'
step={1}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import { DjangoCGDSStudy, DjangoUserFile } from '../../../../utils/django_interf
import ky from 'ky'
import { NewClusteringModelForm } from './NewClusteringModelForm'
import { NewRFModelForm } from './NewRFModelForm'
import { InfoPopup } from '../../../pipeline/experiment-result/gene-gem-details/InfoPopup'
import { InputLabel } from '../../../common/InputLabel'

declare const urlNewTrainedModel: string

Expand Down Expand Up @@ -437,7 +439,17 @@ export const NewTrainedModelModal = (props: NewTrainedModelModalProps) => {

<Form.Input
fluid
label='Number of folds'
label={
<InputLabel label='Number of folds'>
<InfoPopup
content='Defines the number of data splits for cross-validation; ensures robust model evaluation and prevents overfitting.'
onTop={false}
onEvent='hover'
noBorder
extraClassName='pull-right'
/>
</InputLabel>
}
placeholder='An integer number'
type='number'
step={1}
Expand Down
Loading
Loading