Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,192 changes: 2,192 additions & 0 deletions integration_tests/golden_data/protprot_complex_1.pdb

Large diffs are not rendered by default.

2,192 changes: 2,192 additions & 0 deletions integration_tests/golden_data/protprot_complex_2.pdb

Large diffs are not rendered by default.

114 changes: 114 additions & 0 deletions integration_tests/test_clustfcc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import os
import shutil
import tempfile
from pathlib import Path

import pytest

from haddock.libs.libontology import PDBFile
from haddock.modules.analysis.clustfcc import DEFAULT_CONFIG as clustfcc_pars
from haddock.modules.analysis.clustfcc import HaddockModule as ClustFCCModule

from . import golden_data


class MockPreviousIO:
"""MockPreviousIO injects models into the test class"""

def __init__(self, path):
self.path = path

def retrieve_models(self, individualize: bool = False):
shutil.copy(
Path(golden_data, "protprot_complex_1.pdb"),
Path(self.path, "protprot_complex_1.pdb"),
)

shutil.copy(
Path(golden_data, "protprot_complex_2.pdb"),
Path(self.path, "protprot_complex_2.pdb"),
)

model_list = [
PDBFile(
file_name="protprot_complex_1.pdb",
path=self.path,
),
PDBFile(
file_name="protprot_complex_2.pdb",
path=self.path,
),
]
return model_list

def output(self) -> None:
"""Placeholder for the output method"""
return None


@pytest.fixture(name="output_list")
def fixture_output_list():
"""Clustfcc output list."""
return [
"fcc.matrix",
"cluster.out",
"protprot_complex_1.con",
"protprot_complex_2.con",
"clustfcc.txt",
"io.json",
"clustfcc.tsv",
"fcc_matrix.html",
]


@pytest.fixture(name="fcc_module")
def fixture_fcc_module():
"""Clustfcc module."""
with tempfile.TemporaryDirectory() as tempdir:
yield ClustFCCModule(order=1, path=Path(tempdir), initial_params=clustfcc_pars)


def test_clustfcc(fcc_module, output_list):
"""Test clustfcc output."""
fcc_module.previous_io = MockPreviousIO(path=fcc_module.path)
fcc_module.params["plot_matrix"] = True

fcc_module.run()

for _f in output_list:
expected_file = Path(fcc_module.path, _f)

# Check that the file exists
assert expected_file.exists()

# check if the file is not empty
assert expected_file.stat().st_size > 0

# Test the fcc matrix contents
with open(Path(fcc_module.path, "fcc.matrix"), encoding="utf-8", mode="r") as f:
observed_fcc_matrix = f.read()

expected_fcc_output = "1 2 0.05 0.062" + os.linesep
assert observed_fcc_matrix == expected_fcc_output

# Check .con files
expected_output_length = [100, 119]

observed_contact_files = [
Path(fcc_module.path, "protprot_complex_1.con"),
Path(fcc_module.path, "protprot_complex_2.con"),
]

for exp, obs_f in zip(expected_output_length, observed_contact_files):
with open(obs_f, encoding="utf-8", mode="r") as f:
obs = len(f.read().splitlines())
assert obs == exp

# Check cluster.out file
expected_cluster_output = [
"Cluster 1 -> 2 " + os.linesep,
"Cluster 2 -> 1 " + os.linesep,
]
with open(Path(fcc_module.path, "cluster.out"), encoding="utf-8", mode="r") as f:
for expected, line in zip(expected_cluster_output, f.readlines()):
assert line == expected
66 changes: 30 additions & 36 deletions src/haddock/modules/analysis/clustfcc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pathlib import Path

from fcc.scripts import calc_fcc_matrix, cluster_fcc

from haddock import FCC_path, log
from haddock.core.defaults import MODULE_DEFAULT_YAML
from haddock.core.typing import Union
Expand All @@ -22,15 +23,15 @@
write_structure_list,
)
from haddock.libs.libsubprocess import JobInputFirst
from haddock.modules import get_engine
from haddock.modules import BaseHaddockModule, read_from_yaml_config
from haddock.modules import BaseHaddockModule, get_engine, read_from_yaml_config
from haddock.modules.analysis import get_analysis_exec_mode
from haddock.modules.analysis.clustfcc.clustfcc import (
get_cluster_centers,
iterate_clustering,
write_clusters,
write_clustfcc_file,
)
from haddock.modules.analysis import get_analysis_exec_mode


RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)
Expand All @@ -42,11 +43,11 @@ class HaddockModule(BaseHaddockModule):
name = RECIPE_PATH.name

def __init__(
self,
order: int,
path: Path,
initial_params: Union[Path, str] = DEFAULT_CONFIG,
) -> None:
self,
order: int,
path: Path,
initial_params: Union[Path, str] = DEFAULT_CONFIG,
) -> None:
super().__init__(order, path, initial_params)

@classmethod
Expand Down Expand Up @@ -81,9 +82,9 @@ def _run(self) -> None:
contact_f,
contact_executable,
self.params["contact_distance_cutoff"],
)
)
contact_jobs.append(job)

exec_mode = get_analysis_exec_mode(self.params["mode"])

Engine = get_engine(exec_mode, self.params)
Expand All @@ -104,22 +105,19 @@ def _run(self) -> None:

if not_found:
# No contacts were calculated, we cannot cluster
self.finish_with_error(
"Several files were not generated:"
f" {not_found}"
)
self.finish_with_error("Several files were not generated:" f" {not_found}")

log.info("Calculating the FCC matrix")
parsed_contacts = calc_fcc_matrix.parse_contact_file(
contact_file_l,
False,
)
)

# Imporant: matrix is a generator object, be careful with it
matrix = calc_fcc_matrix.calculate_pairwise_matrix(
parsed_contacts,
False,
)
)

# write the matrix to a file, so we can read it afterwards and don't
# need to reinvent the wheel handling this
Expand All @@ -136,26 +134,26 @@ def _run(self) -> None:
fcc_matrix_f,
self.params["clust_cutoff"],
self.params["strictness"],
)
)

# iterate clustering until at least one cluster is found
clusters, min_population = iterate_clustering(
pool,
self.params['min_population'],
)
self.params['min_population'] = min_population
self.params["min_population"],
)
self.params["min_population"] = min_population

# Prepare output and read the elements
if clusters:
# Write the clusters
write_clusters(clusters)

# Get the cluster centers
clt_dic, clt_centers = get_cluster_centers(
clusters,
models_to_clust,
)
)

# ranking clusters
_scores, sorted_score_dic = rank_clusters(clt_dic, min_population)

Expand All @@ -167,45 +165,41 @@ def _run(self) -> None:
models_to_clust,
self.output_models,
out_fname="clustfcc.tsv",
)
)

write_clustfcc_file(
clusters,
clt_centers,
clt_dic,
self.params,
sorted_score_dic
)
clusters, clt_centers, clt_dic, self.params, sorted_score_dic
)
else:
log.warning("No clusters were found")
self.output_models = models_to_clust # type: ignore

# Draw the matrix
if self.params['plot_matrix']:
if self.params["plot_matrix"]:
# Obtain final models indices
final_order_idx, labels, cluster_ids = [], [], []
for pdb in self.output_models:
final_order_idx.append(models_to_clust.index(pdb))
labels.append(pdb.file_name.replace('.pdb', ''))
labels.append(pdb.file_name.replace(".pdb", ""))
cluster_ids.append(pdb.clt_id)
# Get custom cluster data
matrix_cluster_dt, cluster_limits = get_cluster_matrix_plot_clt_dt(
cluster_ids
)
)

# Define output filename
html_matrix_basepath = 'fcc_matrix'
html_matrix_basepath = "fcc_matrix"
# Plot matrix
html_matrixpath = plot_cluster_matrix(
fcc_matrix_f,
final_order_idx,
labels,
dttype='FCC',
dttype="FCC",
diag_fill=1,
output_fname=html_matrix_basepath,
matrix_cluster_dt=matrix_cluster_dt,
cluster_limits=cluster_limits,
)
)
log.info(f"Plotting matrix in {html_matrixpath}")

# Export models for next module
Expand Down
Loading