omics-datascience · GonzzaG · Jul 18, 2024 · Oct 4, 2024 · Oct 5, 2024 · Oct 11, 2024
diff --git a/DEPLOYING.md b/DEPLOYING.md
@@ -45,6 +45,7 @@ The following are the steps to perform a deployment in production. In case you w
         - `CGDS_CHUNK_SIZE`: size **in bytes** of the chunk in which the files of a CGDS study are downloaded, the bigger it is, the faster the download is, but the more server memory it consumes. Default `2097152`, i.e. 2MB.
         - `THRESHOLD_ORDINAL`: number of different values for the GEM (CNA) information to be considered ordinal, if the number is <= to this value then it is considered categorical/ordinal and a boxplot is displayed, otherwise, it is considered continuous and the common correlation graph is displayed. Default `5`.
         - `THRESHOLD_GEM_SIZE_TO_COLLECT`: GEM file size threshold (in MB) for the GEM dataset to be available in memory. This has a HUGE impact on the performance of the analysis. If the size is less than or equal to this threshold, it is allocated in memory, otherwise, it will be read lazily from the disk. If None GGCA automatically allocates in memory when the GEM dataset size is small (<= 100MB). Therefore, if you want to force to always use RAM to improve performance you should set a very high threshold, on the contrary, if you want a minimum memory usage at the cost of poor performance, set it to `0`. Default `None`.
+        - `MIN_PASSWORD_LEN`:  Defines the minimum required length for user passwords when updating their profile. If the provided password is shorter than this length, the update will be rejected. Default `8`.
     - PostgreSQL:
         - `POSTGRES_USERNAME`: PostgreSQL connection username. **Must be equal to** `POSTGRES_USER`.
         - `POSTGRES_PASSWORD`: PostgreSQL connection password. **Must be equal to** `POSTGRES_PASSWORD`.
@@ -179,7 +180,7 @@ To integrate with [Modulector][modulector] and/or [BioAPI][bioapi] using `docker
                name: 'multiomix-network'
    ```
 3. The new versions of BioAPI and Modulector already come with service names suitable for integration with Multiomix. But **if you have any old version of those platforms**, change the Modulector and BioAPI configuration so that it does not conflict with the Multiomix configuration:
-   1. Rename all the services in the Modulector and BioAPI `docker-compose.yml` files with the suffix `_modulector` and `_bioapi`. And rename `web` service to `modulector` or `bioapi` respectively. **NOTE:** do not forget to rename the `depends_on` parameters, and the database connection parameters to point to the new services names.
+   1. Rename all the services in the Modulector and BioAPI `docker-compose.yml` files with the suffix `_modulector` and `_bioapi`. For example `mongo_bioapi`, `web_bioapi` and `nginx_bioapi` in the case of BioAPI. **NOTE:** do not forget to rename the `depends_on` parameters, and the database connection parameters to point to the new services names.
    2. Change the following block in the NGINX configuration files. In Modulector it's `config/nginx/conf.d/modulector.conf`, in BioAPI it's `/nginx/conf.d/default.conf`:
    ```
    # Old
@@ -191,7 +192,7 @@ To integrate with [Modulector][modulector] and/or [BioAPI][bioapi] using `docker
    # New
    upstream web {
      ip_hash;
-     server modulector:8000; # Or bioapi, dependening on which config file you're 
+     server web_modulector:8000; # Or web_bioapi, dependening on which config file you're editing
    }
    ```
 4. Set Multiomix parameters:

diff --git a/src/api_service/migrations/0061_alter_experiment_shared_users.py b/src/api_service/migrations/0061_alter_experiment_shared_users.py
@@ -0,0 +1,20 @@
+# Generated by Django 4.2.15 on 2024-10-23 13:51
+
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+        ('api_service', '0060_experiment_shared_users'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='experiment',
+            name='shared_users',
+            field=models.ManyToManyField(blank=True, related_name='shared_users_correlation_analysis', to=settings.AUTH_USER_MODEL),
+        ),
+    ]
diff --git a/src/api_service/mrna_service.py b/src/api_service/mrna_service.py
@@ -13,10 +13,28 @@ class MRNAService(object):
 
     def __init__(self):
         modulector_settings = settings.MODULECTOR_SETTINGS
-        self.url_modulector_prefix = f"http://{modulector_settings['host']}:{modulector_settings['port']}"
+        self.url_modulector_prefix = self.__build_url(modulector_settings)
 
         bioapi_settings = settings.BIOAPI_SETTINGS
-        self.url_bioapi_prefix = f"http://{bioapi_settings['host']}:{bioapi_settings['port']}"
+        self.url_bioapi_prefix = self.__build_url(bioapi_settings)
+
+    @staticmethod
+    def __build_url(settings: Dict[str, Any]) -> str:
+        """
+        Constructs the URL based on the settings provided.
+        If the port is the default for the protocol (80 for http, 443 for https), it is omitted.
+        Otherwise, the port is included in the URL.
+        @param settings: Dictionary containing protocol, host, and port information.
+        @return: Constructed URL as a string.
+        """
+        protocol = settings['protocol']
+        host = settings['host']
+        port = settings['port']
+
+        if (protocol == 'http' and port == 80) or (protocol == 'https' and port == 443):
+            return f"{protocol}://{host}"
+        else:
+            return f"{protocol}://{host}:{port}"
 
     @staticmethod
     def __generate_rest_query_params(get_request: QueryDict) -> str:

diff --git a/src/api_service/websocket_functions.py b/src/api_service/websocket_functions.py
@@ -106,3 +106,25 @@ def send_update_cluster_label_set_command(user_id: int):
         'command': 'update_cluster_labels_sets'
     }
     send_message(user_group_name, message)
+
+def send_update_institutions_command(user_id: int):
+    """
+    Sends a message indicating that a Institution state update has occurred
+    @param user_id: Institution's user's id to send the WS message
+    """
+    user_group_name = f'notifications_{user_id}'
+    message = {
+        'command': 'update_institutions'
+    }
+    send_message(user_group_name, message)
+
+def send_update_user_for_institution_command(user_id: int):
+    """
+    Sends a message indicating that a Institution_user state update has occurred
+    @param user_id: Institution's user's id to send the WS message
+    """
+    user_group_name = f'notifications_{user_id}'
+    message = {
+        'command': 'update_user_for_institution'
+    }
+    send_message(user_group_name, message)
diff --git a/src/biomarkers/migrations/0019_biomarker_is_public.py b/src/biomarkers/migrations/0019_biomarker_is_public.py
@@ -0,0 +1,18 @@
+# Generated by Django 4.2.15 on 2024-11-21 18:10
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('biomarkers', '0018_alter_biomarker_state'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='biomarker',
+            name='is_public',
+            field=models.BooleanField(default=False),
+        ),
+    ]
diff --git a/src/biomarkers/migrations/0020_biomarker_shared_institutions.py b/src/biomarkers/migrations/0020_biomarker_shared_institutions.py
@@ -0,0 +1,19 @@
+# Generated by Django 4.2.15 on 2024-11-21 18:36
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('institutions', '0004_auto_20220923_2322'),
+        ('biomarkers', '0019_biomarker_is_public'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='biomarker',
+            name='shared_institutions',
+            field=models.ManyToManyField(blank=True, related_name='biomarkers', to='institutions.institution'),
+        ),
+    ]
diff --git a/src/biomarkers/models.py b/src/biomarkers/models.py
@@ -3,6 +3,8 @@
 from django.db import models
 from django.db.models import QuerySet
 from queryset_sequence import QuerySetSequence
+
+from institutions.models import Institution
 from tags.models import Tag
 from api_service.websocket_functions import send_update_biomarkers_command
 from user_files.models_choices import MoleculeType
@@ -57,17 +59,19 @@ class Biomarker(models.Model):
     statistical_validations: QuerySet['statistical_properties.StatisticalValidation']
     inference_experiments: QuerySet['inferences.InferenceExperiment']
     methylations: QuerySet['MethylationIdentifier']
+    trained_models: QuerySet['trained_models.TrainedModel']
     cnas: QuerySet['CNAIdentifier']
     mirnas: QuerySet['MiRNAIdentifier']
     mrnas: QuerySet['MRNAIdentifier']
-
+    is_public = models.BooleanField(blank=False, null=False, default=False)
     name: str = models.CharField(max_length=300)
     description: Optional[str] = models.TextField(null=True, blank=True)
     tag: Optional[Tag] = models.ForeignKey(Tag, on_delete=models.SET_NULL, default=None, blank=True, null=True)
     upload_date: Optional[models.DateTimeField] = models.DateTimeField(auto_now_add=True, blank=False, null=True)
     origin: int = models.IntegerField(choices=BiomarkerOrigin.choices)
     state: int = models.IntegerField(choices=BiomarkerState.choices)
     user = models.ForeignKey(get_user_model(), on_delete=models.CASCADE)
+    shared_institutions = models.ManyToManyField(Institution, related_name='biomarkers', blank=True)
 
     def __str__(self) -> str:
         return self.name

diff --git a/src/biomarkers/serializers.py b/src/biomarkers/serializers.py
@@ -1,4 +1,5 @@
 from rest_framework import serializers
+from genes.serializers import GeneGEMWithType
 from user_files.models_choices import MoleculeType
 from .models import Biomarker, MRNAIdentifier, MethylationIdentifier, CNAIdentifier, MiRNAIdentifier, MoleculeIdentifier
 from tags.serializers import TagSerializer
@@ -164,5 +165,14 @@ def get_was_already_used(ins: Biomarker) -> bool:
         This avoids the user to edit a Biomarker that was already used and generate inconsistencies.
         """
         return ins.was_already_used
+
+
+class BiomarkerFromCorrelationAnalysisSerializer(serializers.Serializer):
+    """
+    Serializer for BiomarkerFromCorrelationAnalysis, including correlation analysis ID, gene GEM list, and correlation threshold.
+    """
+    correlation_analysis_id = serializers.IntegerField()
+    gene_gem_list = GeneGEMWithType(many=True, required=False)
+    correlation_threshold = serializers.IntegerField(required=False)
 
 
diff --git a/src/biomarkers/urls.py b/src/biomarkers/urls.py
@@ -18,4 +18,5 @@
     path('methylation-sites', views.MethylationSites.as_view(), name='methylation_sites'),
     path('methylation-sites-finder', views.MethylationSites.as_view(), name='methylation_sites_finder'),
     path('biomarker-molecules', views.BiomarkerMolecules.as_view(), name='biomarker_molecules'),
+    path('biomarker-correlation-api', views.BiomarkerCorrelationAPIView.as_view(), name='biomarker_correlation_api'),
 ]
diff --git a/src/biomarkers/views.py b/src/biomarkers/views.py
@@ -10,21 +10,26 @@
 from rest_framework.request import Request
 from rest_framework.response import Response
 from rest_framework.views import APIView
+from api_service.models import Experiment
 from api_service.mrna_service import global_mrna_service
 from biomarkers.models import Biomarker, BiomarkerState, BiomarkerOrigin, MoleculeIdentifier
-from biomarkers.serializers import BiomarkerSerializer, MoleculeIdentifierSerializer, \
+from biomarkers.serializers import BiomarkerFromCorrelationAnalysisSerializer, BiomarkerSerializer, MoleculeIdentifierSerializer, \
     BiomarkerSimpleSerializer, BiomarkerSimpleUpdateSerializer
 from common.pagination import StandardResultsSetPagination
 from common.response import generate_json_response_or_404
-from django.db.models import QuerySet
+from django.db.models import QuerySet, Q
+
 
 
 class BiomarkerList(generics.ListAPIView):
     """REST endpoint: list for Biomarker model"""
 
     def get_queryset(self):
+        user = self.request.user
         only_successful = self.request.GET.get('onlySuccessful') == 'true'
-        biomarkers = Biomarker.objects.filter(user=self.request.user)
+        biomarkers = Biomarker.objects.filter(
+            Q(is_public=True) | Q(user=user) | Q(shared_institutions__institutionadministration__user=user)).distinct()
+
         if only_successful:
             # FIXME: this is VERY slow. Taking more than 20secs in production. Must parametrize the DB, maybe
             # FIXME: autovacuum settings could help
@@ -112,7 +117,6 @@ def get(self, request: Request, pk: int):
             self.__copy_molecules_instances(biomarker_copy, biomarker.cnas.all())
             self.__copy_molecules_instances(biomarker_copy, biomarker.methylations.all())
 
-
         return Response({'ok': True})
 
 
@@ -142,6 +146,7 @@ def get_gene_aliases(genes_ids: List[str]) -> Optional[Dict]:
         method='post'
     )
 
+
 def find_genes_from_request(request: Request) -> List[Dict]:
     """
     Generates the structure for the frontend for a list of genes. The needed structure is a list of dicts with
@@ -273,3 +278,22 @@ def get_queryset(self):
     filter_backends = [filters.OrderingFilter, filters.SearchFilter, DjangoFilterBackend]
     search_fields = ['identifier']
     ordering_fields = ['identifier']
+
+
+class BiomarkerCorrelationAPIView(APIView):
+    """Validates the request data and retrieves the corresponding experiment."""
+
+    def post(self, request, *args, **kwargs):
+        # Instantiate the serializer with the received data
+        serializer = BiomarkerFromCorrelationAnalysisSerializer(data=request.data)
+
+        # Validate the data (returns a 400 error if the structure is incorrect)
+        serializer.is_valid(raise_exception=True)
+        validated_data = serializer.validated_data
+
+        # Here the Biomarker is created validating which parameters were sent from the frontend
+        cor_analysis = get_object_or_404(Experiment, pk=validated_data['correlation_analysis_id'])
+
+        return Response({
+            "ok": True,
+        })
diff --git a/src/feature_selection/fs_algorithms_spark.py b/src/feature_selection/fs_algorithms_spark.py
@@ -32,6 +32,10 @@ def __get_clustering_algorithm_value(cluster_algorithm: ClusteringAlgorithm) ->
     """Gets the corresponding string value for the parameter 'clustering-algorithm' of the EMR integration."""
     if cluster_algorithm == ClusteringAlgorithm.SPECTRAL:
         return 'spectral'
+    if cluster_algorithm == ClusteringAlgorithm.BK_MEANS:
+        return 'bk_means'
+    if cluster_algorithm == ClusteringAlgorithm.WARD:
+        return 'ward'
     return 'k_means'  # Default is kmeans
 
 

diff --git a/src/feature_selection/fs_models.py b/src/feature_selection/fs_models.py
@@ -1,6 +1,6 @@
 from typing import Literal, Union, Optional
 from django.conf import settings
-from sklearn.cluster import KMeans, SpectralClustering
+from sklearn.cluster import KMeans, SpectralClustering, BisectingKMeans, AgglomerativeClustering
 from sksurv.ensemble import RandomSurvivalForest
 from sksurv.svm import FastKernelSurvivalSVM
 from .models import ClusteringAlgorithm
@@ -12,7 +12,7 @@
 SVMOptimizerOptions = Literal["avltree", "rbtree"]
 
 # Available models for clustering
-ClusteringModels = Union[KMeans, SpectralClustering]
+ClusteringModels = Union[KMeans, SpectralClustering, BisectingKMeans, AgglomerativeClustering]
 
 
 def get_clustering_model(clustering_algorithm: ClusteringAlgorithm,
@@ -28,6 +28,10 @@ def get_clustering_model(clustering_algorithm: ClusteringAlgorithm,
         return KMeans(n_clusters=number_of_clusters, random_state=random_state, n_init='auto')
     elif clustering_algorithm == ClusteringAlgorithm.SPECTRAL:
         return SpectralClustering(n_clusters=number_of_clusters, random_state=random_state)
+    elif clustering_algorithm == ClusteringAlgorithm.WARD:
+        return AgglomerativeClustering(n_clusters=number_of_clusters, linkage='ward')
+    elif clustering_algorithm == ClusteringAlgorithm.BK_MEANS:
+        return BisectingKMeans(n_clusters=number_of_clusters, random_state=random_state)
 
     raise Exception(f'Invalid clustering_algorithm parameter: {clustering_algorithm}')
 

diff --git a/src/feature_selection/migrations/0056_alter_clusteringparameters_algorithm_and_more.py b/src/feature_selection/migrations/0056_alter_clusteringparameters_algorithm_and_more.py
@@ -0,0 +1,23 @@
+# Generated by Django 4.2.15 on 2024-10-23 13:51
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('feature_selection', '0055_alter_fsexperiment_app_name_and_more'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='clusteringparameters',
+            name='algorithm',
+            field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means')], default=1),
+        ),
+        migrations.AlterField(
+            model_name='clusteringtimesrecord',
+            name='algorithm',
+            field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means')]),
+        ),
+    ]
diff --git a/src/feature_selection/migrations/0057_alter_clusteringparameters_algorithm_and_more.py b/src/feature_selection/migrations/0057_alter_clusteringparameters_algorithm_and_more.py
@@ -0,0 +1,23 @@
+# Generated by Django 4.2.15 on 2025-01-02 20:33
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('feature_selection', '0056_alter_clusteringparameters_algorithm_and_more'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='clusteringparameters',
+            name='algorithm',
+            field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means'), (4, 'Ward')], default=1),
+        ),
+        migrations.AlterField(
+            model_name='clusteringtimesrecord',
+            name='algorithm',
+            field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means'), (4, 'Ward')]),
+        ),
+    ]
diff --git a/src/feature_selection/models.py b/src/feature_selection/models.py
@@ -33,6 +33,8 @@ class ClusteringAlgorithm(models.IntegerChoices):
     """Clustering algorithm."""
     K_MEANS = 1
     SPECTRAL = 2  # TODO: implement in backend
+    BK_MEANS = 3
+    WARD = 4
 
 
 class ClusteringMetric(models.IntegerChoices):

diff --git a/src/feature_selection/views.py b/src/feature_selection/views.py
@@ -209,7 +209,15 @@ def __get_clustering_parameters_columns(row: pd.Series) -> Tuple[int, Clustering
         parameters_desc = row['parameters']
         params = parameters_desc.split('_')
         number_of_clusters, algorithm_description, scoring_method = params[0], params[2], params[4]
-        algorithm = ClusteringAlgorithm.K_MEANS if algorithm_description == 'k-means' else ClusteringAlgorithm.SPECTRAL
+        # algorithm = ClusteringAlgorithm.K_MEANS if algorithm_description == 'k-means' else ClusteringAlgorithm.SPECTRAL
+        if algorithm_description == 'k-means':
+            algorithm = ClusteringAlgorithm.K_MEANS
+        elif algorithm_description == 'spectral':
+            algorithm = ClusteringAlgorithm.SPECTRAL
+        elif algorithm_description == 'ward':
+            algorithm = ClusteringAlgorithm.WARD
+        else:
+            algorithm = ClusteringAlgorithm.BK_MEANS
         scoring = ClusteringScoringMethod.C_INDEX if scoring_method == 'concordance-index' \
             else ClusteringScoringMethod.LOG_LIKELIHOOD
         return number_of_clusters, algorithm, scoring