Skip to content

Collection of structures & TMAlign #143

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ba7f46d
added possiblity of collection of structures and function to work on …
gezmi Apr 30, 2024
5642c20
added testing for cases when error is raised
gezmi Apr 30, 2024
cd9b14e
fixing import of stack
gezmi May 1, 2024
b74265b
initial implementation of tmalign
gezmi May 1, 2024
1650bba
installation of package with TMalign (USalign)
gezmi May 2, 2024
a2c0476
added accidentally deleted test data
gezmi May 2, 2024
93f8600
tmalign working and tested
gezmi May 2, 2024
b3c0c7f
stack writing test updated with absolute paths, so they do not faile …
gezmi May 2, 2024
407ef44
deleted stack import to prevent circular import
gezmi May 2, 2024
164701f
modify, so that it does not need stack import for checking type of va…
gezmi May 2, 2024
82fc69f
tmalign inside alignment works
gezmi May 2, 2024
d569cab
changed type of equality check
gezmi May 2, 2024
541bea7
removed unnecessary print
gezmi May 2, 2024
7c5cd8d
added new line
gezmi May 2, 2024
55c4205
corrected typos in functions' names
gezmi May 2, 2024
35bb8f7
added clean up after temporary matrix file
gezmi May 2, 2024
2b29780
added possibility to specify target when aligning stack
gezmi May 2, 2024
f5e4ddc
added tutorials and necessary files for TMalign and stack
gezmi May 6, 2024
2d1c0c0
installation requires TMalign
gezmi May 6, 2024
29fa124
added TMalign and stacks to changelog
gezmi May 6, 2024
6ad051e
rearranged tmalign of stacks, updated tests
gezmi May 6, 2024
ccf56ba
changed version
gezmi May 6, 2024
664111f
updated installation with usalign
gezmi May 6, 2024
9a20af5
added api docs and tutorial md
gezmi May 6, 2024
7b52570
updated gitignore
gezmi May 6, 2024
a6cd5ed
Merge branch 'main' into stack_tmalign
a-r-j Jul 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ docs/tutorials/Working_with_mmCIF_Structures_in_DataFrames_files/*
docs/tutorials/Working_with_MOL2_Structures_in_DataFrames_files/*
docs/tutorials/Working_with_PDB_Structures_in_DataFrames_files/*
docs/tutorials/data/3eiy_stripped.pdb.gz
docs/tutorials/data/*output/
docs/tutorials/data/aligned_structures/
docs/api_subpackages/
docs/api_modules
docs/py-docstring-parser/
Expand Down Expand Up @@ -35,3 +37,5 @@ doc/_build
*ENV
.DS_store
.idea
/biopandas/align/USalign.exe
/biopandas/align/USalign
2 changes: 1 addition & 1 deletion biopandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#

__version__ = "0.5.1dev"
__version__ = "0.6.0dev"
__author__ = "Sebastian Raschka <[email protected]>"
15 changes: 15 additions & 0 deletions biopandas/align/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# BioPandas
# Author: Sebastian Raschka <[email protected]>
# License: BSD 3 clause
# Project Website: http://rasbt.github.io/biopandas/
# Code Repository: https://github.com/rasbt/biopandas

"""
BioPandas module for working with a collection
Protein Data Bank (PDB) files.
"""

from .align import Align
from .tmalign import TMAlign

__all__ = ['Align', 'TMAlign']
52 changes: 52 additions & 0 deletions biopandas/align/align.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
""" Class for aligning PDB structures"""

# BioPandas
# Author: Sebastian Raschka <[email protected]>
# License: BSD 3 clause
# Project Website: http://rasbt.github.io/biopandas/
# Code Repository: https://github.com/rasbt/biopandas

import tempfile
from copy import deepcopy

import numpy as np


class Align():
def __init__(self):
pass

def write_pdb_to_temp_file(self, pdb):
"""Write a PandasPdb object's data to a temporary PDB file and return the file handle.
:param pdb: the PandasPdb object to write to the file.

:return: file handle
"""
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.pdb')
pdb.to_pdb(path=temp_file.name, records=None, gz=False, append_newline=True)
return temp_file

def filter_and_validate_chain(self, pdb, chain_id):
"""Filter the PandasPdb by chain_id and validate the presence of the chain.
:param pdb: the PandasPdb object to filter.
:param chain_id: the chain ID to filter by.

:return: filtered_pdb
"""
filtered_pdb = deepcopy(pdb)
filtered_atoms = pdb.df['ATOM'][pdb.df['ATOM']['chain_id'].isin([chain_id])]
if filtered_atoms.empty:
raise ValueError(f"No such chain '{chain_id}' found in the structure.")
filtered_pdb.df['ATOM'] = filtered_atoms
return filtered_pdb

def transform(self, coords, matrix, translation):
"""Apply the rotation matrix and translation vector to the structure.
:param coords: the coordinates to transform.
:param matrix: the rotation matrix.
:param translation: the translation vector.

:return: transformed coordinates as a numpy array.
"""

return np.dot(coords, matrix.T) + translation
Empty file.
1,911 changes: 1,911 additions & 0 deletions biopandas/align/tests/data/2d7t.pdb

Large diffs are not rendered by default.

27,811 changes: 27,811 additions & 0 deletions biopandas/align/tests/data/2jyf.pdb

Large diffs are not rendered by default.

2,147 changes: 2,147 additions & 0 deletions biopandas/align/tests/data/3eiy.pdb

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions biopandas/align/tests/data/4eiy_anisouchunk.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
HEADER MEMBRANE PROTEIN 06-APR-12 4EIY
ATOM 101 CG1 VAL A 12 -5.222 2.059 3.696 1.00 17.98 C
ANISOU 101 CG1 VAL A 12 2231 2335 2262 -42 326 285 C
ATOM 102 CG2 VAL A 12 -5.748 3.603 1.839 1.00 16.30 C
ANISOU 102 CG2 VAL A 12 2409 2061 1722 37 584 238 C
ATOM 103 N GLU A 13 -8.338 2.518 5.470 1.00 16.24 N
ANISOU 103 N GLU A 13 2362 2337 1469 -17 0 0 N
ATOM 104 CA GLU A 13 -8.773 1.934 6.759 1.00 15.34 C
ANISOU 104 CA GLU A 13 2354 2186 1285 -33 -18 -33 C
ATOM 105 C GLU A 13 -9.966 1.026 6.560 1.00 16.90 C
13 changes: 13 additions & 0 deletions biopandas/align/tests/data/tmalign_output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
------ The rotation matrix to rotate Structure_1 to Structure_2 ------
m t[m] u[m][0] u[m][1] u[m][2]
0 -7.8641206905 0.9999934600 0.0029980568 -0.0020227599
1 -28.5356545158 -0.0030052335 0.9999891677 -0.0035542976
2 -45.4038063602 0.0020120820 0.0035603533 0.9999916377

Code for rotating Structure 1 from (x,y,z) to (X,Y,Z):
for(i=0; i<L; i++)
{
X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i];
Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i];
Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i];
}
55 changes: 55 additions & 0 deletions biopandas/align/tests/test_align.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# BioPandas
# Author: Sebastian Raschka <[email protected]>
# License: BSD 3 clause
# Project Website: http://rasbt.github.io/biopandas/
# Code Repository: https://github.com/rasbt/biopandas

from nose.tools import assert_raises
from biopandas.pdb import PandasPdb
from biopandas.align import Align
import numpy as np
import os

TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), "data", "3eiy.pdb")
TESTDATA_FILENAME2 = os.path.join(
os.path.dirname(__file__), "data", "4eiy_anisouchunk.pdb"
)

OUTFILE = os.path.join(os.path.dirname(__file__), "data", "tmp.pdb")

def test_write_pdb_to_temp_file():
ppdb = PandasPdb()
ppdb.read_pdb(TESTDATA_FILENAME)

align = Align()

with align.write_pdb_to_temp_file(ppdb) as temp_file:
assert os.path.exists(temp_file.name)
assert temp_file.name.endswith('.pdb')

temp_content = open(temp_file.name, 'r').read()
assert temp_content == open(TESTDATA_FILENAME, 'r').read()

def test_transform():
align = Align()
coords = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
matrix = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
translation = np.array([3, 2, 1])

transformed_coords = align.transform(coords, matrix, translation)
target_coords = np.array([[4, 4, 4], [7, 7, 7], [10, 10, 10]])

assert np.array_equal(transformed_coords, target_coords)

def test_filter_and_validate_chain():
ppdb = PandasPdb()
ppdb.read_pdb(TESTDATA_FILENAME)

align = Align()

filtered_pdb = align.filter_and_validate_chain(ppdb, 'A')
assert filtered_pdb.df['ATOM']['chain_id'].unique() == ['A']

assert_raises(ValueError, align.filter_and_validate_chain, ppdb, 'B')


Loading
Loading