diff --git a/doc/apidoc.json b/doc/apidoc.json
index b68573551..0d3eb7798 100644
--- a/doc/apidoc.json
+++ b/doc/apidoc.json
@@ -356,7 +356,6 @@
             "set_component",
             "list_assemblies",
             "get_assembly"
-
         ],
         "CIF format" : [
             "CIFFile",
@@ -382,5 +381,17 @@
             "StringArrayEncoding",
             "TypeCode"
         ]
+    },
+    "biotite.structure.alphabet" : {
+        "Structural alphabets": [
+            "I3DSequence",
+            "ProteinBlocksAlphabet",
+            "ClepapsAlphabet"
+        ],
+        "Conversion Function": [
+            "to_3di",
+            "to_protein_blocks",
+            "to_clepaps"
+        ]
     }
 }
diff --git a/doc/references.bib b/doc/references.bib
index 4a08c984c..bfa6b180f 100644
--- a/doc/references.bib
+++ b/doc/references.bib
@@ -742,7 +742,7 @@ @article{Steele2021
   eprint = {2001.05304},
   primaryclass = {cs},
   doi = {10.48550/arXiv.2001.05304},
-  archiveprefix = {arxiv}
+  archiveprefix = {arXiv}
 }
 
 @article{Steinegger2017,
@@ -838,6 +838,35 @@ @article{VanHerk1992
   doi = {10.1016/0167-8655(92)90069-C}
 }
 
+@article{VanKempen2024,
+  title = {Fast and Accurate Protein Structure Search with {{Foldseek}}},
+  author = {{van Kempen}, Michel and Kim, Stephanie S. and Tumescheit, Charlotte and Mirdita, Milot and Lee, Jeongjae and Gilchrist, Cameron L. M. and Söding, Johannes and Steinegger, Martin},
+  year = {2024},
+  month = feb,
+  journal = {Nature Biotechnology},
+  volume = {42},
+  number = {2},
+  pages = {243--246},
+  publisher = {Nature Publishing Group},
+  issn = {1546-1696},
+  doi = {10.1038/s41587-023-01773-0}
+}
+
+@article{Wang2008,
+  title = {{{CLePAPS}}: {{FAST PAIR ALIGNMENT OF PROTEIN STRUCTURES BASED ON CONFORMATIONAL LETTERS}}},
+  shorttitle = {{{CLePAPS}}},
+  author = {Wang, Sheng and Zheng, Wei-Mou},
+  year = {2008},
+  month = apr,
+  journal = {Journal of Bioinformatics and Computational Biology},
+  volume = {06},
+  number = {02},
+  pages = {347--366},
+  publisher = {World Scientific Publishing Co.},
+  issn = {0219-7200},
+  doi = {10.1142/S0219720008003461}
+}
+
 @article{Westbrook2015,
   title = {The Chemical Component Dictionary: Complete Descriptions of Constituent Molecules in Experimentally Determined {{3D}} Macromolecules in the {{Protein Data Bank}}},
   shorttitle = {The Chemical Component Dictionary},
diff --git a/src/biotite/sequence/align/matrix.py b/src/biotite/sequence/align/matrix.py
index 2a7d23437..fd5f02e9a 100644
--- a/src/biotite/sequence/align/matrix.py
+++ b/src/biotite/sequence/align/matrix.py
@@ -2,14 +2,17 @@
 # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
 # information.
 
+__all__ = ["SubstitutionMatrix"]
 __name__ = "biotite.sequence.align"
 __author__ = "Patrick Kunzmann"
 
-import os
+import functools
+from pathlib import Path
 import numpy as np
 from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
 
-__all__ = ["SubstitutionMatrix"]
+# Directory of matrix files
+_DB_DIR = Path(__file__).parent / "matrix_data"
 
 
 class SubstitutionMatrix(object):
@@ -59,6 +62,12 @@ class SubstitutionMatrix(object):
             - **RBLOSUM<n>_<BLOCKS>**
             - **CorBLOSUM<n>_<BLOCKS>**
 
+        - Structural alphabet substitution matrices
+
+            - **3Di** - For 3Di alphabet from ``foldseek`` :footcite:`VanKempen2024`
+            - **PB** - For Protein Blocks alphabet from *PBexplore* :footcite:`Barnoud2017`
+            - **CLESUM** - For CLePAPS alphabet :footcite:`Wang2008`
+
     A list of all available matrix names is returned by
     :meth:`list_db()`.
 
@@ -124,9 +133,6 @@ class SubstitutionMatrix(object):
     >>> matrix = SubstitutionMatrix(alph, alph, "BLOSUM50")
     """
 
-    # Directory of matrix files
-    _db_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "matrix_data")
-
     def __init__(self, alphabet1, alphabet2, score_matrix):
         self._alph1 = alphabet1
         self._alph2 = alphabet2
@@ -350,7 +356,7 @@ def dict_from_db(matrix_name):
         matrix_dict : dict
             A dictionary representing the substitution matrix.
         """
-        filename = SubstitutionMatrix._db_dir + os.sep + matrix_name + ".mat"
+        filename = _DB_DIR / f"{matrix_name}.mat"
         with open(filename, "r") as f:
             return SubstitutionMatrix.dict_from_str(f.read())
 
@@ -364,11 +370,10 @@ def list_db():
         db_list : list
             List of matrix names in the internal database.
         """
-        files = os.listdir(SubstitutionMatrix._db_dir)
-        # Remove '.mat' from files
-        return [file[:-4] for file in sorted(files)]
+        return [path.stem for path in _DB_DIR.glob("*.mat")]
 
     @staticmethod
+    @functools.cache
     def std_protein_matrix():
         """
         Get the default :class:`SubstitutionMatrix` for protein sequence
@@ -379,9 +384,12 @@ def std_protein_matrix():
         matrix : SubstitutionMatrix
             Default matrix.
         """
-        return _matrix_blosum62
+        return SubstitutionMatrix(
+            ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62"
+        )
 
     @staticmethod
+    @functools.cache
     def std_nucleotide_matrix():
         """
         Get the default :class:`SubstitutionMatrix` for DNA sequence
@@ -392,13 +400,107 @@ def std_nucleotide_matrix():
         matrix : SubstitutionMatrix
             Default matrix.
         """
-        return _matrix_nuc
+        return SubstitutionMatrix(
+            NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC"
+        )
+
+    @staticmethod
+    @functools.cache
+    def std_3di_matrix():
+        """
+        Get the default :class:`SubstitutionMatrix` for 3Di sequence
+        alignments.
+        :footcite:`VanKempen2024`
+
+        Returns
+        -------
+        matrix : SubstitutionMatrix
+            Default matrix.
+        """
+        # Import inside function to avoid circular import
+        from biotite.structure.alphabet.i3d import I3DSequence
 
+        return SubstitutionMatrix(I3DSequence.alphabet, I3DSequence.alphabet, "3Di")
+
+    @staticmethod
+    @functools.cache
+    def std_protein_blocks_matrix(unknown_match=200, unkown_mismatch=-200):
+        """
+        Get the default :class:`SubstitutionMatrix` for Protein Blocks sequences.
 
-# Preformatted BLOSUM62 and NUC substitution matrix from NCBI
-_matrix_blosum62 = SubstitutionMatrix(
-    ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62"
-)
-_matrix_nuc = SubstitutionMatrix(
-    NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC"
-)
+        The matrix is adapted from *PBxplore* :footcite:`Barnoud2017`.
+
+        Parameters
+        ----------
+        unknown_match, unkown_mismatch : int, optional
+            The match and mismatch score for undefined symbols.
+            The default values were chose arbitrarily.
+
+        Returns
+        -------
+        matrix : SubstitutionMatrix
+            Default matrix.
+
+        References
+        ----------
+
+        .. footbibliography::
+
+        """
+        from biotite.structure.alphabet.pb import ProteinBlocksSequence
+
+        alphabet = ProteinBlocksSequence.alphabet
+        unknown_symbol = ProteinBlocksSequence.unknown_symbol
+        matrix_dict = SubstitutionMatrix.dict_from_db("PB")
+        for symbol in alphabet:
+            if symbol == unknown_symbol:
+                continue
+            matrix_dict[symbol, unknown_symbol] = unkown_mismatch
+            matrix_dict[unknown_symbol, symbol] = unkown_mismatch
+        matrix_dict[unknown_symbol, unknown_symbol] = unknown_match
+        return SubstitutionMatrix(
+            alphabet,
+            alphabet,
+            matrix_dict,
+        )
+
+    @staticmethod
+    @functools.cache
+    def std_clepaps_matrix(unknown_match=200, unkown_mismatch=-200):
+        """
+        Get the default :class:`SubstitutionMatrix` for *CLePAPS* sequences.
+
+        Parameters
+        ----------
+        unknown_match, unkown_mismatch : int, optional
+            The match and mismatch score for undefined symbols.
+            The default values were chose arbitrarily.
+
+        Returns
+        -------
+        matrix : SubstitutionMatrix
+            Default matrix.
+
+        References
+        ----------
+
+        .. footbibliography::
+
+        """
+        from biotite.structure.alphabet.pb import ProteinBlocksSequence
+
+        alphabet = ProteinBlocksSequence.alphabet
+        unknown_symbol = ProteinBlocksSequence.unknown_symbol
+        matrix_dict = SubstitutionMatrix.dict_from_db("CLESUM")
+        # Add match/mismatch scores for undefined symbols
+        for symbol in alphabet:
+            if symbol == unknown_symbol:
+                continue
+            matrix_dict[symbol, unknown_symbol] = unkown_mismatch
+            matrix_dict[unknown_symbol, symbol] = unkown_mismatch
+        matrix_dict[unknown_symbol, unknown_symbol] = unknown_match
+        return SubstitutionMatrix(
+            alphabet,
+            alphabet,
+            matrix_dict,
+        )
diff --git a/src/biotite/sequence/align/matrix_data/3Di.mat b/src/biotite/sequence/align/matrix_data/3Di.mat
new file mode 100644
index 000000000..93fe4e97b
--- /dev/null
+++ b/src/biotite/sequence/align/matrix_data/3Di.mat
@@ -0,0 +1,25 @@
+# 3Di bit/2
+# Background (precomputed optional): 0.0489372 0.0306991 0.101049 0.0329671 0.0276149 0.0416262 0.0452521 0.030876 0.0297251 0.0607036 0.0150238 0.0215826 0.0783843 0.0512926 0.0264886 0.0610702 0.0201311 0.215998 0.0310265 0.0295417 0.00001
+# Lambda     (precomputed optional): 0.351568
+    A   C   D   E   F   G   H   I   K   L   M   N   P   Q   R   S   T   V   W   Y   X
+A   6  -3   1   2   3  -2  -2  -7  -3  -3 -10  -5  -1   1  -4  -7  -5  -6   0  -2   0
+C  -3   6  -2  -8  -5  -4  -4 -12 -13   1 -14   0   0   1  -1   0  -8   1  -7  -9   0
+D   1  -2   4  -3   0   1   1  -3  -5  -4  -5  -2   1  -1  -1  -4  -2  -3  -2  -2   0
+E   2  -8  -3   9  -2  -7  -4 -12 -10  -7 -17  -8  -6  -3  -8 -10 -10 -13  -6  -3   0
+F   3  -5   0  -2   7  -3  -3  -5   1  -3  -9  -5  -2   2  -5  -8  -3  -7   4  -4   0
+G  -2  -4   1  -7  -3   6   3   0  -7  -7  -1  -2  -2  -4   3  -3   4  -6  -4  -2   0
+H  -2  -4   1  -4  -3   3   6  -4  -7  -6  -6   0  -1  -3   1  -3  -1  -5  -5   3   0
+I  -7 -12  -3 -12  -5   0  -4   8  -5 -11   7  -7  -6  -6  -3  -9   6 -12  -5  -8   0
+K  -3 -13  -5 -10   1  -7  -7  -5   9 -11  -8 -12  -6  -5  -9 -14  -5 -15   5  -8   0
+L  -3   1  -4  -7  -3  -7  -6 -11 -11   6 -16  -3  -2   2  -4  -4  -9   0  -8  -9   0
+M -10 -14  -5 -17  -9  -1  -6   7  -8 -16  10  -9  -9 -10  -5 -10   3 -16  -6  -9   0
+N  -5   0  -2  -8  -5  -2   0  -7 -12  -3  -9   7   0  -2   2   3  -4   0  -8  -5   0
+P  -1   0   1  -6  -2  -2  -1  -6  -6  -2  -9   0   4   0   0  -2  -4   0  -4  -5   0
+Q   1   1  -1  -3   2  -4  -3  -6  -5   2 -10  -2   0   5  -2  -4  -5  -1  -2  -5   0
+R  -4  -1  -1  -8  -5   3   1  -3  -9  -4  -5   2   0  -2   6   2   0  -1  -6  -3   0
+S  -7   0  -4 -10  -8  -3  -3  -9 -14  -4 -10   3  -2  -4   2   6  -6   0 -11  -9   0
+T  -5  -8  -2 -10  -3   4  -1   6  -5  -9   3  -4  -4  -5   0  -6   8  -9  -5  -5   0
+V  -6   1  -3 -13  -7  -6  -5 -12 -15   0 -16   0   0  -1  -1   0  -9   3 -10 -11   0
+W   0  -7  -2  -6   4  -4  -5  -5   5  -8  -6  -8  -4  -2  -6 -11  -5 -10   8  -6   0
+Y  -2  -9  -2  -3  -4  -2   3  -8  -8  -9  -9  -5  -5  -5  -3  -9  -5 -11  -6   9   0
+X   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
\ No newline at end of file
diff --git a/src/biotite/sequence/align/matrix_data/PB.license b/src/biotite/sequence/align/matrix_data/PB.license
new file mode 100644
index 000000000..688633bfa
--- /dev/null
+++ b/src/biotite/sequence/align/matrix_data/PB.license
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 Poulain, A. G. de Brevern
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/src/biotite/sequence/align/matrix_data/PB.mat b/src/biotite/sequence/align/matrix_data/PB.mat
new file mode 100644
index 000000000..abb8dc293
--- /dev/null
+++ b/src/biotite/sequence/align/matrix_data/PB.mat
@@ -0,0 +1,18 @@
+# PB substitution matrix, adapted from PBxplore
+   a     b     c     d     e     f     g     h     i     j     k     l     m     n     o     p
+a  516   -59   113  -105  -411  -177   -27  -361    47  -103  -644  -259  -599  -372  -124   -83
+b  -59   541  -146  -210  -155  -310   -97    90   182  -128   -30    29  -745  -242  -165    22
+c  113  -146   360   -14  -333  -240    49  -438  -269  -282  -688  -682  -608  -455  -147     6
+d -105  -210   -14   221     5  -131  -349  -278  -253  -173  -585  -670 -1573 -1048  -691  -497
+e -411  -155  -333     5   520   185   186   138  -378   -70  -112  -514 -1136  -469  -617  -632
+f -177  -310  -240  -131   185   459   -99   -45  -445    83  -214   -88  -547  -629  -406  -552
+g  -27   -97    49  -349   186   -99   665   -99   -89  -118  -409  -138  -124   172   128   254
+h -361    90  -438  -278   138   -45   -99   632  -205   316   192  -108  -712  -359    95  -399
+i   47   182  -269  -253  -378  -445   -89  -205   696   186     8    15  -709  -269  -169   226
+j -103  -128  -282  -173   -70    83  -118   316   186   768   196     5  -398  -340  -117  -104
+k -644   -30  -688  -585  -112  -214  -409   192     8   196   568   -65  -270  -231  -471  -382
+l -259    29  -682  -670  -514   -88  -138  -108    15     5   -65   533  -131     8   -11  -316
+m -599  -745  -608 -1573 -1136  -547  -124  -712  -709  -398  -270  -131   241    -4  -190  -155
+n -372  -242  -455 -1048  -469  -629   172  -359  -269  -340  -231     8    -4   703    88   146
+o -124  -165  -147  -691  -617  -406   128    95  -169  -117  -471   -11  -190    88   716    58
+p  -83    22     6  -497  -632  -552   254  -399   226  -104  -382  -316  -155   146    58   609
\ No newline at end of file
diff --git a/src/biotite/structure/alphabet/__init__.py b/src/biotite/structure/alphabet/__init__.py
new file mode 100644
index 000000000..f517b9ed9
--- /dev/null
+++ b/src/biotite/structure/alphabet/__init__.py
@@ -0,0 +1,14 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+A subpackage for converting structures to structural alphabet sequences.
+"""
+
+__name__ = "biotite.structure.alphabet"
+__author__ = "Martin Larralde, Patrick Kunzmann"
+
+from .clepaps import *
+from .i3d import *
+from .pb import *
diff --git a/src/biotite/structure/alphabet/clepaps.py b/src/biotite/structure/alphabet/clepaps.py
new file mode 100644
index 000000000..70bc01f2d
--- /dev/null
+++ b/src/biotite/structure/alphabet/clepaps.py
@@ -0,0 +1,156 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+Conversion of structures into the *Protein Blocks* structural alphabet.
+"""
+
+__name__ = "biotite.structure.alphabet"
+__author__ = "Patrick Kunzmann"
+__all__ = ["ClepapsSequence", "to_clepaps"]
+
+import numpy as np
+from biotite.sequence.alphabet import LetterAlphabet
+from biotite.sequence.sequence import Sequence
+from biotite.structure.chains import get_chain_starts
+from biotite.structure.filter import filter_amino_acids
+from biotite.structure.geometry import angle, dihedral
+from biotite.structure.util import coord_for_atom_name_per_residue
+
+# CLePAPS reference angles
+CLEPAPS_CENTERS = np.array(
+    [
+       [ 1.02, -2.  ,  1.55],
+       [ 1.06, -2.94,  1.34],
+       [ 1.01, -1.88,  1.14],
+       [ 0.79, -2.3 ,  1.03],
+       [ 1.02, -2.98,  0.95],
+       [ 1.09, -2.72,  0.91],
+       [ 1.49,  2.09,  1.05],
+       [ 1.55,  0.88,  1.55],
+       [ 1.52,  0.83,  1.52],
+       [ 1.58,  1.05,  1.55],
+       [ 1.48,  0.7 ,  1.43],
+       [ 1.4 ,  0.75,  0.84],
+       [ 1.47,  1.64,  1.44],
+       [ 1.12,  0.14,  1.49],
+       [ 1.54, -1.89,  1.48],
+       [ 1.24, -2.98,  1.49],
+       [ 0.86, -0.37,  1.01],
+    ]
+)  # fmt: skip
+
+
+class ClepapsSequence(Sequence):
+    """
+    Representation of a structure in the *CLePAPS* structural alphabet.
+    :footcite:`Wang2008`
+
+    Parameters
+    ----------
+    sequence : iterable object, optional
+        The *CLePAPS* sequence.
+        This may either be a list or a string.
+        May take upper or lower case letters.
+        By default the sequence is empty.
+
+    See also
+    --------
+    to_clepaps : Create *CLePAPS* sequences from a structure.
+
+    References
+    ----------
+
+    .. footbibliography::
+
+    """
+
+    alphabet = LetterAlphabet("ABCDEFGHIJKLMNOPQR")
+    unknown_symbol = "R"
+
+    def get_alphabet(self):
+        return ClepapsSequence.alphabet
+
+
+def to_clepaps(atoms):
+    """
+    Encode each chain in the given structure to the *CLePAPS* structural
+    alphabet.
+    :footcite:`Wang2008`
+
+    Parameters
+    ----------
+    atoms : AtomArray
+        The atom array to encode.
+        May contain multiple chains.
+
+    Returns
+    -------
+    sequences : list of Sequence, length=n
+        The encoded *CLePAPS* sequence for each peptide chain in the structure.
+    chain_start_indices : ndarray, shape=(n,), dtype=int
+        The atom index where each chain starts.
+
+    References
+    ----------
+
+    .. footbibliography::
+
+    Examples
+    --------
+
+    >>> sequences, chain_starts = to_clepaps(atom_array)
+    >>> print(sequences[0])
+    """
+    sequences = []
+    chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
+    for i in range(len(chain_start_indices) - 1):
+        start = chain_start_indices[i]
+        stop = chain_start_indices[i + 1]
+        chain = atoms[start:stop]
+        sequences.append(_to_clepaps(chain))
+    return sequences, chain_start_indices[:-1]
+
+
+def _to_clepaps(chain):
+    amino_acid_mask = filter_amino_acids(chain)
+
+    # Coordinates for dihedral angle calculation
+    (coord_ca,) = coord_for_atom_name_per_residue(
+        chain,
+        ("CA",),
+        amino_acid_mask,
+    )
+
+    bending = angle(coord_ca[:-2], coord_ca[1:-1], coord_ca[2:])
+    theta_1 = bending[:-1]
+    theta_2 = bending[1:]
+    tau = dihedral(coord_ca[:-3], coord_ca[1:-2], coord_ca[2:-1], coord_ca[3:])
+    clepaps_angles = np.stack([theta_1, tau, theta_2], axis=-1)
+
+    # Angle RMSD of all reference angles with all actual angles
+    rmsda = np.sum(
+        (CLEPAPS_CENTERS[:, np.newaxis] - clepaps_angles[np.newaxis, :]) ** 2,
+        axis=-1,
+    )
+    # Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
+    clepaps_seq_code = np.full(
+        len(clepaps_angles),
+        ClepapsSequence.alphabet.encode(ClepapsSequence.unknown_symbol),
+    )
+    available_mask = ~np.isnan(rmsda).any(axis=0)
+    # Chose symbol, where the RMSDA to the reference angle is lowest
+    # Due to the definition of Biotite symbol codes
+    # the index of the chosen PB is directly the symbol code
+    clepaps_seq_code[available_mask] = np.argmin(rmsda[:, available_mask], axis=0)
+    # Put the array of symbol codes into actual sequence objects
+    clepaps_sequence = ClepapsSequence()
+    # Since every symbols comprises 4 residues, the sequence length is shortened by 3
+    # By definition of CLePAPS, the first two and the last residue are undefined
+    clepaps_sequence.code = np.full(
+        coord_ca.shape[0],
+        ClepapsSequence.alphabet.encode(ClepapsSequence.unknown_symbol),
+    )
+    clepaps_sequence.code[2:-1] = clepaps_seq_code
+    return clepaps_sequence
diff --git a/src/biotite/structure/alphabet/encoder.py b/src/biotite/structure/alphabet/encoder.py
new file mode 100644
index 000000000..9793a59f4
--- /dev/null
+++ b/src/biotite/structure/alphabet/encoder.py
@@ -0,0 +1,332 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+Implementation of the encoder neural network adapted from ``foldseek``.
+"""
+
+__name__ = "biotite.structure.alphabet"
+__author__ = "Martin Larralde"
+__all__ = ["Encoder", "VirtualCenterEncoder", "PartnerIndexEncoder", "FeatureEncoder"]
+
+import abc
+from importlib.resources import files as resource_files
+import numpy
+import numpy.ma
+from biotite.structure.alphabet.layers import CentroidLayer, Model
+from biotite.structure.alphabet.unkerasify import load_kerasify
+
+
+class _BaseEncoder(abc.ABC):
+    @abc.abstractmethod
+    def encode(self, ca, cb, n, c):
+        """
+        Encode the given atom coordinates to a different representation.
+
+        Parameters
+        ----------
+        ca, cb, n, c : ndarray, shape=(n, 3), dtype=float
+            The coordinates of the ``CA``, ``CB``, ``N`` and ``C`` atoms for each
+            residue.
+            *NaN* if missing, e.g. ``CB`` for glycine.
+
+        Returns
+        -------
+        encoded : MaskedArray, shape=(n, m), dtype=float
+            The encoded representation.
+        """
+        raise NotImplementedError
+
+
+class VirtualCenterEncoder(_BaseEncoder):
+    r"""
+    An encoder for converting a protein structure to a virtual center.
+
+    For each residue, the coordinates of the virtual center are computed
+    from the coordinates of the ``CA``, ``CB`` and ``N`` atoms. The virtual center
+    :math:`V` is defined by the angle :math:`\theta = \angle V C_{\alpha} C_{\beta}`,
+    the dihedral angle :math:`\tau = \angle V C_{\alpha} C_{\beta} N` and the length
+    :math:`l = |V - C_{\alpha}|`. The default parameters used
+    in ``foldseek`` were selected after optimization on a validation set.
+
+    Parameters
+    ----------
+    distance_alpha_beta : float
+        The default distance between the ``CA`` and ``CB`` atoms to use when
+        reconstructing missing *Cβ* coordinates.
+    distance_alpha_v : float
+        The distance between the virtual center *V* and the ``CA`` atom, used to compute
+        the virtual center coordinates.
+    theta : float
+        The angle θ between the virtual center *V*, the ``CA`` and ``CB`` atoms, used to
+        compute the virtual center coordinates.
+    tau : float
+        The dihedral angle τ between the virtual center *V* and the ``CA``, ``CB``
+        and ``N`` atoms, used to compute the virtual center coordinates.
+    """
+
+    _DISTANCE_ALPHA_BETA = 1.5336
+
+    def __init__(
+        self,
+        *,
+        distance_alpha_beta=_DISTANCE_ALPHA_BETA,
+        distance_alpha_v=2.0,
+        theta=270.0,
+        tau=0.0,
+    ):
+        self.theta = theta
+        self.tau = tau
+        self.distance_alpha_v = distance_alpha_v
+        self.distance_alpha_beta = distance_alpha_beta
+
+    @property
+    def theta(self):
+        return numpy.rad2deg(self._theta)
+
+    @theta.setter
+    def theta(self, theta):
+        self._theta = numpy.deg2rad(theta)
+        self._cos_theta = numpy.cos(self._theta)
+        self._sin_theta = numpy.sin(self._theta)
+
+    @property
+    def tau(self):
+        return numpy.rad2deg(self._tau)
+
+    @tau.setter
+    def tau(self, tau):
+        self._tau = numpy.deg2rad(tau)
+        self._cos_tau = numpy.cos(self._tau)
+        self._sin_tau = numpy.sin(self._tau)
+
+    def _compute_virtual_center(self, ca, cb, n):
+        assert ca.shape == n.shape
+        assert ca.shape == cb.shape
+        v = cb - ca
+        a = cb - ca
+        b = n - ca
+        # normal angle
+        k = _normalize(numpy.cross(a, b, axis=-1), inplace=True)
+        v = (
+            v * self._cos_theta
+            + numpy.cross(k, v) * self._sin_theta
+            + k * (k * v).sum(axis=-1).reshape(-1, 1) * (1 - self._cos_theta)
+        )
+        # dihedral angle
+        k = _normalize(n - ca, inplace=True)
+        v = (
+            v * self._cos_tau
+            + numpy.cross(k, v) * self._sin_tau
+            + k * (k * v).sum(axis=-1).reshape(-1, 1) * (1 - self._cos_tau)
+        )
+        # apply final vector to Cα
+        v *= self.distance_alpha_v
+        v += ca
+        return v
+
+    def _approximate_cb_position(self, ca, n, c):
+        """
+        Approximate the position of ``CB`` from the backbone atoms.
+        """
+        assert ca.shape == n.shape
+        assert ca.shape == c.shape
+        v1 = _normalize(c - ca, inplace=True)
+        v2 = _normalize(n - ca, inplace=True)
+        v3 = v1 / 3.0
+
+        b1 = numpy.add(v2, v3, out=v2)
+        b2 = numpy.cross(v1, b1, axis=-1)
+        u1 = _normalize(b1, inplace=True)
+        u2 = _normalize(b2, inplace=True)
+
+        out = (numpy.sqrt(8) / 3.0) * ((-u1 / 2.0) - (u2 * numpy.sqrt(3) / 2.0)) - v3
+        out *= self.distance_alpha_beta
+        out += ca
+        return out
+
+    def _create_nan_mask(self, ca, n, c):
+        """
+        Mask any column which contains at least one *NaN* value.
+        """
+        mask_ca = numpy.isnan(ca).max(axis=1)
+        mask_n = numpy.isnan(n).max(axis=1)
+        mask_c = numpy.isnan(c).max(axis=1)
+        return (mask_ca | mask_n | mask_c).repeat(3).reshape(-1, 3)
+
+    def encode(self, ca, cb, n, c):
+        ca = numpy.asarray(ca)
+        cb = numpy.asarray(cb)
+        n = numpy.asarray(n)
+        c = numpy.asarray(c)
+
+        assert ca.shape == cb.shape
+        assert ca.shape == c.shape
+        assert ca.shape == n.shape
+
+        # fix CB positions if needed
+        nan_indices = numpy.isnan(cb)
+        if numpy.any(nan_indices):
+            cb_approx = self._approximate_cb_position(ca, n, c)
+            # avoid writing to CB directly since it should be callee-save
+            cb_approx[~nan_indices] = cb[~nan_indices]
+            cb = cb_approx
+        # compute virtual center
+        vc = self._compute_virtual_center(ca, cb, n)
+        # mask residues without coordinates
+        return numpy.ma.masked_array(
+            vc,
+            mask=self._create_nan_mask(ca, n, c),
+            fill_value=numpy.nan,
+        )
+
+
+class PartnerIndexEncoder(_BaseEncoder):
+    """
+    An encoder for converting a protein structure to partner indices.
+
+    For each residue, the coordinates of the virtual center are computed from the
+    coordinates of the ``CA``, ``CB`` and ``N`` atoms.
+    A pairwise distance matrix is then created, and the index of the closest partner
+    residue is extracted for each position.
+    """
+
+    def __init__(self):
+        self.vc_encoder = VirtualCenterEncoder()
+
+    def _find_residue_partners(
+        self,
+        x,
+    ):
+        # compute pairwise squared distance matrix
+        r = numpy.sum(x * x, axis=-1).reshape(-1, 1)
+        r[0] = r[-1] = numpy.nan
+        D = r - 2 * numpy.ma.dot(x, x.T) + r.T
+        # avoid selecting residue itself as the best
+        D[numpy.diag_indices_from(D)] = numpy.inf
+        # get the closest non-masked residue
+        return numpy.nan_to_num(D, copy=False, nan=numpy.inf).argmin(axis=1)
+
+    def encode(self, ca, cb, n, c):
+        # encode backbone atoms to virtual center
+        vc = self.vc_encoder.encode(ca, cb, n, c)
+        # find closest neighbor for each residue
+        return self._find_residue_partners(vc)
+
+
+class FeatureEncoder(_BaseEncoder):
+    """
+    An encoder for converting a protein structure to structural descriptors.
+    """
+
+    def __init__(self):
+        self.partner_index_encoder = PartnerIndexEncoder()
+        self.vc_encoder = self.partner_index_encoder.vc_encoder
+
+    def _calc_conformation_descriptors(self, ca, partner_index, dtype=numpy.float32):
+        # build arrays of indices to use for vectorized angles
+        i = numpy.arange(1, ca.shape[-2] - 1)
+        j = partner_index[i]
+        # compute conformational descriptors
+        u1 = _normalize(ca[..., i, :] - ca[..., i - 1, :], inplace=True)
+        u2 = _normalize(ca[..., i + 1, :] - ca[..., i, :], inplace=True)
+        u3 = _normalize(ca[..., j, :] - ca[..., j - 1, :], inplace=True)
+        u4 = _normalize(ca[..., j + 1, :] - ca[..., j, :], inplace=True)
+        u5 = _normalize(ca[..., j, :] - ca[..., i, :], inplace=True)
+        desc = numpy.zeros((ca.shape[0], 10), dtype=dtype)
+        desc[i, 0] = numpy.sum(u1 * u2, axis=-1)
+        desc[i, 1] = numpy.sum(u3 * u4, axis=-1)
+        desc[i, 2] = numpy.sum(u1 * u5, axis=-1)
+        desc[i, 3] = numpy.sum(u3 * u5, axis=-1)
+        desc[i, 4] = numpy.sum(u1 * u4, axis=-1)
+        desc[i, 5] = numpy.sum(u2 * u3, axis=-1)
+        desc[i, 6] = numpy.sum(u1 * u3, axis=-1)
+        desc[i, 7] = numpy.linalg.norm(ca[i] - ca[j], axis=-1)
+        desc[i, 8] = numpy.clip(j - i, -4, 4)
+        desc[i, 9] = numpy.copysign(numpy.log(numpy.abs(j - i) + 1), j - i)
+        return desc
+
+    def _create_descriptor_mask(self, mask, partner_index):
+        i = numpy.arange(1, mask.shape[0] - 1)
+        j = partner_index[i]
+        out = numpy.zeros((mask.shape[0], 10), dtype=numpy.bool_)
+        out[1:-1, :] |= (
+            mask[i - 1] | mask[i] | mask[i + 1] | mask[j - 1] | mask[j] | mask[j + 1]
+        ).reshape(mask.shape[0] - 2, 1)
+        out[0] = out[-1] = True
+        return out
+
+    def encode(self, ca, cb, n, c):
+        # encode backbone atoms to virtual center
+        vc = self.vc_encoder.encode(ca, cb, n, c)
+        # find closest neighbor for each residue
+        partner_index = self.partner_index_encoder._find_residue_partners(vc)
+        # build position features from residue angles
+        descriptors = self._calc_conformation_descriptors(ca, partner_index)
+        # create mask
+        mask = self._create_descriptor_mask(vc.mask[:, 0], partner_index)
+        return numpy.ma.masked_array(
+            descriptors,
+            mask=mask,
+            fill_value=numpy.nan,
+        )
+
+
+class Encoder(_BaseEncoder):
+    """
+    An encoder for converting a protein structure to 3di states.
+    """
+
+    _INVALID_STATE = 2
+    _CENTROIDS = numpy.array(
+        [
+            [-1.0729, -0.3600],
+            [-0.1356, -1.8914],
+            [0.4948, -0.4205],
+            [-0.9874, 0.8128],
+            [-1.6621, -0.4259],
+            [2.1394, 0.0486],
+            [1.5558, -0.1503],
+            [2.9179, 1.1437],
+            [-2.8814, 0.9956],
+            [-1.1400, -2.0068],
+            [3.2025, 1.7356],
+            [1.7769, -1.3037],
+            [0.6901, -1.2554],
+            [-1.1061, -1.3397],
+            [2.1495, -0.8030],
+            [2.3060, -1.4988],
+            [2.5522, 0.6046],
+            [0.7786, -2.1660],
+            [-2.3030, 0.3813],
+            [1.0290, 0.8772],
+        ]
+    )
+
+    def __init__(self):
+        self.feature_encoder = FeatureEncoder()
+        layers = load_kerasify(
+            resource_files(__package__).joinpath("encoder_weights_3di.kerasify")
+        )
+        self.vae_encoder = Model(layers + (CentroidLayer(self._CENTROIDS),))
+
+    def encode(
+        self,
+        ca,
+        cb,
+        n,
+        c,
+    ):
+        descriptors = self.feature_encoder.encode(ca, cb, n, c)
+        states = self.vae_encoder(descriptors.data)
+        return numpy.ma.masked_array(
+            states,
+            mask=descriptors.mask[:, 0],
+            fill_value=self._INVALID_STATE,
+        )
+
+
+def _normalize(x, *, inplace=False):
+    norm = numpy.linalg.norm(x, axis=-1).reshape(*x.shape[:-1], 1)
+    return numpy.divide(x, norm, out=x if inplace else None, where=norm != 0)
diff --git a/src/biotite/structure/alphabet/encoder_weights_3di.kerasify b/src/biotite/structure/alphabet/encoder_weights_3di.kerasify
new file mode 100644
index 000000000..cfec8fbe4
Binary files /dev/null and b/src/biotite/structure/alphabet/encoder_weights_3di.kerasify differ
diff --git a/src/biotite/structure/alphabet/i3d.py b/src/biotite/structure/alphabet/i3d.py
new file mode 100644
index 000000000..1f295d1e1
--- /dev/null
+++ b/src/biotite/structure/alphabet/i3d.py
@@ -0,0 +1,131 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+NumPy port of the ``foldseek`` code for encoding structures to 3di.
+"""
+
+__name__ = "biotite.structure.alphabet"
+__author__ = "Martin Larralde"
+__all__ = ["I3DSequence", "to_3di"]
+
+from biotite.sequence.alphabet import LetterAlphabet
+from biotite.sequence.sequence import Sequence
+from biotite.structure.alphabet.encoder import Encoder
+from biotite.structure.chains import get_chain_starts
+from biotite.structure.util import coord_for_atom_name_per_residue
+
+
+class I3DSequence(Sequence):
+    """
+    Representation of a structure in the 3Di structural alphabet.
+    :footcite:`VanKempen2024`
+
+    Parameters
+    ----------
+    sequence : iterable object, optional
+        The 3Di sequence.
+        This may either be a list or a string.
+        May take upper or lower case letters.
+        By default the sequence is empty.
+
+    See also
+    --------
+    to_3di : Create 3Di sequences from a structure.
+
+    References
+    ----------
+
+    .. footbibliography::
+
+    """
+
+    alphabet = LetterAlphabet(
+        [
+            "A",
+            "C",
+            "D",
+            "E",
+            "F",
+            "G",
+            "H",
+            "I",
+            "K",
+            "L",
+            "M",
+            "N",
+            "P",
+            "Q",
+            "R",
+            "S",
+            "T",
+            "V",
+            "W",
+            "Y",
+        ]
+    )
+    unknown_symbol = "D"
+
+    def __init__(self, sequence=""):
+        if isinstance(sequence, str):
+            sequence = sequence.upper()
+        else:
+            sequence = [symbol.upper() for symbol in sequence]
+        seq_code = I3DSequence.alphabet.encode_multiple(sequence)
+        super().__init__()
+        self.code = seq_code
+
+    def get_alphabet(self):
+        return I3DSequence.alphabet
+
+    def __repr__(self):
+        return f'I3DSequence("{"".join(self.symbols)}")'
+
+
+def to_3di(atoms):
+    """
+    Encode each chain in the given structure to the 3Di structure alphabet.
+    :footcite:`VanKempen2024`
+
+    Parameters
+    ----------
+    atoms : AtomArray
+        The atom array to encode.
+        May contain multiple chains.
+
+    Returns
+    -------
+    sequences : list of Sequence, length=n
+        The encoded 3Di sequence for each peptide chain in the structure.
+    chain_start_indices : ndarray, shape=(n,), dtype=int
+        The atom index where each chain starts.
+
+    References
+    ----------
+
+    .. footbibliography::
+
+    Examples
+    --------
+
+    >>> sequences, chain_starts = to_3di(atom_array)
+    >>> print(sequences[0])
+    DQQVVCVVCPNVVNVDHGDD
+    """
+    sequences = []
+    chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
+    for i in range(len(chain_start_indices) - 1):
+        start = chain_start_indices[i]
+        stop = chain_start_indices[i + 1]
+        chain = atoms[start:stop]
+        sequence = I3DSequence()
+        sequence.code = (
+            Encoder()
+            .encode(
+                *coord_for_atom_name_per_residue(chain, ["CA", "CB", "N", "C"]),
+            )
+            .filled()
+        )
+        sequences.append(sequence)
+    return sequences, chain_start_indices[:-1]
diff --git a/src/biotite/structure/alphabet/layers.py b/src/biotite/structure/alphabet/layers.py
new file mode 100644
index 000000000..63279cd15
--- /dev/null
+++ b/src/biotite/structure/alphabet/layers.py
@@ -0,0 +1,86 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+Implementation of the neural network layers used in ``foldseek``.
+"""
+
+__name__ = "biotite.structure.alphabet"
+__author__ = "Martin Larralde"
+__all__ = ["Layer", "DenseLayer", "CentroidLayer", "Model"]
+
+import abc
+import functools
+import numpy
+
+
+class Layer(abc.ABC):
+    @abc.abstractmethod
+    def __call__(self, x):
+        raise NotImplementedError
+
+
+class DenseLayer(Layer):
+    def __init__(self, weights, biases=None, activation: bool = True):
+        self.activation = activation
+        self.weights = numpy.asarray(weights)
+        if biases is None:
+            self.biases = numpy.zeros(self.weights.shape[1])
+        else:
+            self.biases = numpy.asarray(biases)
+
+    def __call__(self, x):
+        x = numpy.asarray(x)
+        out = x @ self.weights
+        out += self.biases
+
+        if self.activation:
+            return _relu(out, out=out)
+        else:
+            return out
+
+
+class CentroidLayer(Layer):
+    def __init__(self, centroids) -> None:
+        self.centroids = numpy.asarray(centroids)
+        self.r2 = numpy.sum(self.centroids**2, axis=1).reshape(-1, 1).T
+
+    def __call__(self, x):
+        # compute pairwise squared distance matrix
+        r1 = numpy.sum(x**2, axis=1).reshape(-1, 1)
+        D = r1 - 2 * x @ self.centroids.T + self.r2
+        # find closest centroid
+        states = numpy.empty(D.shape[0], dtype=numpy.uint8)
+        D.argmin(axis=1, out=states)
+        return states
+
+
+class Model:
+    def __init__(self, layers=()):
+        self.layers = list(layers)
+
+    def __call__(self, x):
+        return functools.reduce(lambda x, f: f(x), self.layers, x)
+
+
+def _relu(
+    x,
+    out=None,
+    *,
+    where=True,
+    casting="same_kind",
+    order="K",
+    dtype=None,
+    subok=True,
+):
+    return numpy.maximum(
+        0.0,
+        x,
+        out=out,
+        where=where,
+        casting=casting,
+        order=order,
+        dtype=dtype,
+        subok=subok,
+    )
diff --git a/src/biotite/structure/alphabet/pb.license b/src/biotite/structure/alphabet/pb.license
new file mode 100644
index 000000000..688633bfa
--- /dev/null
+++ b/src/biotite/structure/alphabet/pb.license
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 Poulain, A. G. de Brevern
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/src/biotite/structure/alphabet/pb.py b/src/biotite/structure/alphabet/pb.py
new file mode 100644
index 000000000..e2c527cca
--- /dev/null
+++ b/src/biotite/structure/alphabet/pb.py
@@ -0,0 +1,143 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+Conversion of structures into the *Protein Blocks* structural alphabet.
+"""
+
+__name__ = "biotite.structure.alphabet"
+__author__ = "Patrick Kunzmann"
+__all__ = ["ProteinBlocksSequence", "to_protein_blocks"]
+
+import numpy as np
+from biotite.sequence.alphabet import LetterAlphabet
+from biotite.sequence.sequence import Sequence
+from biotite.structure.chains import get_chain_starts
+from biotite.structure.geometry import dihedral_backbone
+
+# PB reference angles, adapted from PBxplore
+PB_ANGLES = np.array(
+    [
+        [41.14,    75.53,   13.92,  -99.80,  131.88,  -96.27, 122.08,  -99.68],
+        [108.24,  -90.12,  119.54,  -92.21,  -18.06, -128.93, 147.04,  -99.90],
+        [-11.61, -105.66,   94.81, -106.09,  133.56, -106.93, 135.97, -100.63],
+        [141.98, -112.79,  132.20, -114.79,  140.11, -111.05, 139.54, -103.16],
+        [133.25, -112.37,  137.64, -108.13,  133.00,  -87.30, 120.54,   77.40],
+        [116.40, -105.53,  129.32,  -96.68,  140.72,  -74.19, -26.65,  -94.51],
+        [0.40,    -81.83,    4.91, -100.59,   85.50,  -71.65, 130.78,   84.98],
+        [119.14, -102.58,  130.83,  -67.91,  121.55,   76.25,  -2.95,  -90.88],
+        [130.68,  -56.92,  119.26,   77.85,   10.42,  -99.43, 141.40,  -98.01],
+        [114.32, -121.47,  118.14,   82.88, -150.05,  -83.81,  23.35,  -85.82],
+        [117.16,  -95.41,  140.40,  -59.35,  -29.23,  -72.39, -25.08,  -76.16],
+        [139.20,  -55.96,  -32.70,  -68.51,  -26.09,  -74.44, -22.60,  -71.74],
+        [-39.62,  -64.73,  -39.52,  -65.54,  -38.88,  -66.89, -37.76,  -70.19],
+        [-35.34,  -65.03,  -38.12,  -66.34,  -29.51,  -89.10,  -2.91,   77.90],
+        [-45.29,  -67.44,  -27.72,  -87.27,    5.13,   77.49,  30.71,  -93.23],
+        [-27.09,  -86.14,    0.30,   59.85,   21.51,  -96.30, 132.67,  -92.91],
+    ]
+)  # fmt: skip
+
+
+class ProteinBlocksSequence(Sequence):
+    """
+    Representation of a structure in the *Protein Blocks* structural alphabet.
+    :footcite:`Brevern2000`
+
+    Parameters
+    ----------
+    sequence : iterable object, optional
+        The *Protein Blocks* sequence.
+        This may either be a list or a string.
+        May take upper or lower case letters.
+        By default the sequence is empty.
+
+    See also
+    --------
+    to_protein_blocks : Create *Protein Blocks* sequences from a structure.
+
+    References
+    ----------
+
+    .. footbibliography::
+
+    """
+
+    alphabet = LetterAlphabet("abcdefghijklmnopZ")
+    unknown_symbol = "Z"
+
+    def get_alphabet(self):
+        return ProteinBlocksSequence.alphabet
+
+
+def to_protein_blocks(atoms):
+    """
+    Encode each chain in the given structure to the *Protein Blocks* structural
+    alphabet.
+    :footcite:`Brevern2000`
+
+    Parameters
+    ----------
+    atoms : AtomArray
+        The atom array to encode.
+        May contain multiple chains.
+
+    Returns
+    -------
+    sequences : list of Sequence, length=n
+        The encoded *Protein Blocks* sequence for each peptide chain in the structure.
+    chain_start_indices : ndarray, shape=(n,), dtype=int
+        The atom index where each chain starts.
+
+    References
+    ----------
+
+    .. footbibliography::
+
+    Examples
+    --------
+
+    >>> sequences, chain_starts = to_protein_blocks(atom_array)
+    >>> print(sequences[0])
+    ZZmmmmmnopjmnopacdZZ
+    """
+    sequences = []
+    chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
+    for i in range(len(chain_start_indices) - 1):
+        start = chain_start_indices[i]
+        stop = chain_start_indices[i + 1]
+        chain = atoms[start:stop]
+        sequences.append(_to_protein_blocks(chain))
+    return sequences, chain_start_indices[:-1]
+
+
+def _to_protein_blocks(chain):
+    phi, psi, _ = dihedral_backbone(chain)
+
+    pb_angles = np.full((len(phi), 8), np.nan)
+    pb_angles[2:-2, 0] = psi[:-4]
+    pb_angles[2:-2, 1] = phi[1:-3]
+    pb_angles[2:-2, 2] = psi[1:-3]
+    pb_angles[2:-2, 3] = phi[2:-2]
+    pb_angles[2:-2, 4] = psi[2:-2]
+    pb_angles[2:-2, 5] = phi[3:-1]
+    pb_angles[2:-2, 6] = psi[3:-1]
+    pb_angles[2:-2, 7] = phi[4:]
+    pb_angles = np.rad2deg(pb_angles)
+
+    # Angle RMSD of all reference angles with all actual angles
+    rmsda = np.sum(
+        ((PB_ANGLES[:, np.newaxis] - pb_angles[np.newaxis, :] + 180) % 360 - 180) ** 2,
+        axis=-1,
+    )
+    # Where RMSDA is NaN, (missing atoms/residues or chain ends) set symbol to unknown
+    pb_seq_code = np.full(len(pb_angles), ProteinBlocksSequence.alphabet.encode("Z"))
+    pb_available_mask = ~np.isnan(rmsda).any(axis=0)
+    # Chose PB, where the RMSDA to the reference angle is lowest
+    # Due to the definition of Biotite symbol codes
+    # the index of the chosen PB is directly the symbol code
+    pb_seq_code[pb_available_mask] = np.argmin(rmsda[:, pb_available_mask], axis=0)
+    # Put the array of symbol codes into actual sequence objects
+    pb_sequence = ProteinBlocksSequence()
+    pb_sequence.code = pb_seq_code
+    return pb_sequence
diff --git a/src/biotite/structure/alphabet/unkerasify.py b/src/biotite/structure/alphabet/unkerasify.py
new file mode 100644
index 000000000..95e228af0
--- /dev/null
+++ b/src/biotite/structure/alphabet/unkerasify.py
@@ -0,0 +1,122 @@
+# This source code is part of the Biotite package and is distributed
+# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
+# information.
+
+"""
+Parser for extracting weights from Keras files.
+
+Adapted from `moof2k/kerasify <https://github.com/moof2k/kerasify>`_.
+"""
+
+__name__ = "biotite.structure.alphabet"
+__author__ = "Martin Larralde"
+__all__ = ["load_kerasify"]
+
+import enum
+import functools
+import itertools
+import struct
+import numpy as np
+from biotite.structure.alphabet.layers import DenseLayer, Layer
+
+
+class LayerType(enum.IntEnum):
+    DENSE = 1
+    CONVOLUTION2D = 2
+    FLATTEN = 3
+    ELU = 4
+    ACTIVATION = 5
+    MAXPOOLING2D = 6
+    LSTM = 7
+    EMBEDDING = 8
+
+
+class ActivationType(enum.IntEnum):
+    LINEAR = 1
+    RELU = 2
+    SOFTPLUS = 3
+    SIGMOID = 4
+    TANH = 5
+    HARD_SIGMOID = 6
+
+
+class KerasifyParser:
+    """An incomplete parser for model files serialized with `kerasify`.
+
+    Notes
+    -----
+    Only dense layers are supported, since the ``foldseek`` VQ-VAE model
+    is only using 3 dense layers.
+    """
+
+    def __init__(self, file) -> None:
+        self.file = file
+        self.buffer = bytearray(1024)
+        (self.n_layers,) = self._get("I")
+
+    def read(self):
+        if self.n_layers == 0:
+            return None
+
+        self.n_layers -= 1
+        layer_type = LayerType(self._get("I")[0])
+        if layer_type == LayerType.DENSE:
+            (w0,) = self._get("I")
+            (w1,) = self._get("I")
+            (b0,) = self._get("I")
+            weights = (
+                np.frombuffer(self._read(f"={w0*w1}f"), dtype="f4")
+                .reshape(w0, w1)
+                .copy()
+            )
+            biases = np.frombuffer(self._read(f"={b0}f"), dtype="f4").copy()
+            activation = ActivationType(self._get("I")[0])
+            if activation not in (ActivationType.LINEAR, ActivationType.RELU):
+                raise NotImplementedError(
+                    f"Unsupported activation type: {activation!r}"
+                )
+            return DenseLayer(weights, biases, activation == ActivationType.RELU)
+        else:
+            raise NotImplementedError(f"Unsupported layer type: {layer_type!r}")
+
+    def __iter__(self):
+        return self
+
+    def __next__(self) -> Layer:
+        layer = self.read()
+        if layer is None:
+            raise StopIteration
+        return layer
+
+    def _read(self, format: str) -> memoryview:
+        n = struct.calcsize(format)
+        if len(self.buffer) < n:
+            self.buffer.extend(
+                itertools.islice(itertools.repeat(0), n - len(self.buffer))
+            )
+        v = memoryview(self.buffer)[:n]
+        self.file.readinto(v)  # type: ignore
+        return v
+
+    def _get(self, format: str):
+        v = self._read(format)
+        return struct.unpack(format, v)
+
+
+@functools.cache
+def load_kerasify(file_path):
+    """
+    Load the the model layers from a ``.kerasify`` file.
+
+    Parameters
+    ----------
+    file_path : str
+        The path to the ``.kerasify`` file.
+
+    Returns
+    -------
+    layers : tuple of Layer
+        The model layers.
+    """
+    with open(file_path, "rb") as file:
+        return tuple(KerasifyParser(file))
diff --git a/src/biotite/structure/geometry.py b/src/biotite/structure/geometry.py
index cc5c59f4e..8f64fbfb8 100644
--- a/src/biotite/structure/geometry.py
+++ b/src/biotite/structure/geometry.py
@@ -25,10 +25,12 @@
 import numpy as np
 from biotite.structure.atoms import AtomArray, AtomArrayStack, coord
 from biotite.structure.box import coord_to_fraction, fraction_to_coord, is_orthogonal
-from biotite.structure.chains import chain_iter
-from biotite.structure.error import BadStructureError
-from biotite.structure.filter import filter_peptide_backbone
-from biotite.structure.util import norm_vector, vector_dot
+from biotite.structure.filter import filter_amino_acids
+from biotite.structure.util import (
+    coord_for_atom_name_per_residue,
+    norm_vector,
+    vector_dot,
+)
 
 
 def displacement(atoms1, atoms2, box=None):
@@ -480,139 +482,84 @@ def index_dihedral(*args, **kwargs):
 
 def dihedral_backbone(atom_array):
     """
-    Measure the characteristic backbone dihedral angles of a protein
-    structure.
+    Measure the characteristic backbone dihedral angles of a chain.
 
     Parameters
     ----------
-    atom_array: AtomArray or AtomArrayStack
-        The protein structure. A complete backbone, without gaps,
-        is required here.
-        Chain transitions are allowed, the angles at the transition are
-        `NaN`.
-        The order of the backbone atoms for each residue must be
-        (N, CA, C).
+    atoms: AtomArray or AtomArrayStack
+        The protein structure to measure the dihedral angles for.
+        For missing backbone atoms the corresponding angles are `NaN`.
 
     Returns
     -------
     phi, psi, omega : ndarray
-        An array containing the 3 backbone dihedral angles for every
-        CA. 'phi' is not defined at the N-terminus, 'psi' and 'omega'
-        are not defined at the C-terminus. In these places the arrays
-        have *NaN* values. If an :class:`AtomArrayStack` is given, the
-        output angles are 2-dimensional, the first dimension corresponds
-        to the model number.
-
-    Raises
-    ------
-    BadStructureError
-        If the amount of backbone atoms is not equal to amount of
-        residues times 3 (for N, CA and C).
-
-    See Also
-    --------
-    dihedral
-
-    Examples
-    --------
-
-    >>> phi, psi, omega = dihedral_backbone(atom_array)
-    >>> print(np.stack([np.rad2deg(phi), np.rad2deg(psi)]).T)
-    [[     nan  -56.145]
-     [ -43.980  -51.309]
-     [ -66.466  -30.898]
-     [ -65.219  -45.945]
-     [ -64.747  -30.346]
-     [ -73.136  -43.425]
-     [ -64.882  -43.255]
-     [ -59.509  -25.698]
-     [ -77.989   -8.823]
-     [ 110.784    8.079]
-     [  55.244 -124.371]
-     [ -57.983  -28.766]
-     [ -81.834   19.125]
-     [-124.057   13.401]
-     [  67.931   25.218]
-     [-143.952  131.297]
-     [ -70.100  160.068]
-     [ -69.484  145.669]
-     [ -77.264  124.223]
-     [ -78.100      nan]]
+        An array containing the 3 backbone dihedral angles for every CA atom.
+        `phi` is not defined at the N-terminus, `psi` and `omega` are not defined at the
+        C-terminus.
+        In these places the arrays have *NaN* values.
+        If an :class:`AtomArrayStack` is given, the output angles are 2-dimensional,
+        the first dimension corresponds to the model number.
     """
-    bb_filter = filter_peptide_backbone(atom_array)
-    backbone = atom_array[..., bb_filter]
-
-    if (
-        backbone.array_length() % 3 != 0
-        or (backbone.atom_name[0::3] != "N").any()
-        or (backbone.atom_name[1::3] != "CA").any()
-        or (backbone.atom_name[2::3] != "C").any()
-    ):
-        raise BadStructureError(
-            "The backbone is invalid, must be repeats of (N, CA, C), "
-            "maybe a backbone atom is missing"
-        )
-    phis = []
-    psis = []
-    omegas = []
-    for chain_bb in chain_iter(backbone):
-        phi, psi, omega = _dihedral_backbone(chain_bb)
-        phis.append(phi)
-        psis.append(psi)
-        omegas.append(omega)
-    return (
-        np.concatenate(phis, axis=-1),
-        np.concatenate(psis, axis=-1),
-        np.concatenate(omegas, axis=-1),
-    )
+    amino_acid_mask = filter_amino_acids(atom_array)
 
+    # Coordinates for dihedral angle calculation
+    coord_n, coord_ca, coord_c = coord_for_atom_name_per_residue(
+        atom_array,
+        ("N", "CA", "C"),
+        amino_acid_mask,
+    )
+    n_residues = coord_n.shape[-2]
 
-def _dihedral_backbone(chain_bb):
-    bb_coord = chain_bb.coord
     # Coordinates for dihedral angle calculation
     # Dim 0: Model index (only for atom array stacks)
     # Dim 1: Angle index
     # Dim 2: X, Y, Z coordinates
     # Dim 3: Atoms involved in dihedral angle
-    if isinstance(chain_bb, AtomArray):
-        angle_coord_shape = (len(bb_coord) // 3, 3, 4)
-    elif isinstance(chain_bb, AtomArrayStack):
-        angle_coord_shape = (bb_coord.shape[0], bb_coord.shape[1] // 3, 3, 4)
-    phi_coord = np.full(angle_coord_shape, np.nan)
-    psi_coord = np.full(angle_coord_shape, np.nan)
-    omega_coord = np.full(angle_coord_shape, np.nan)
-
-    # Indices for coordinates of CA atoms
-    ca_i = np.arange(bb_coord.shape[-2] // 3) * 3 + 1
+    if isinstance(atom_array, AtomArray):
+        angle_coord_shape: tuple[int, ...] = (n_residues, 3, 4)
+    elif isinstance(atom_array, AtomArrayStack):
+        angle_coord_shape = (atom_array.stack_depth(), n_residues, 3, 4)
+    coord_for_phi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
+    coord_for_psi = np.full(angle_coord_shape, np.nan, dtype=np.float32)
+    coord_for_omg = np.full(angle_coord_shape, np.nan, dtype=np.float32)
+
     # fmt: off
-    phi_coord  [..., 1:,  :, 0] = bb_coord[..., ca_i[1: ]-2, :]
-    phi_coord  [..., 1:,  :, 1] = bb_coord[..., ca_i[1: ]-1, :]
-    phi_coord  [..., 1:,  :, 2] = bb_coord[..., ca_i[1: ],   :]
-    phi_coord  [..., 1:,  :, 3] = bb_coord[..., ca_i[1: ]+1, :]
-    psi_coord  [..., :-1, :, 0] = bb_coord[..., ca_i[:-1]-1, :]
-    psi_coord  [..., :-1, :, 1] = bb_coord[..., ca_i[:-1],   :]
-    psi_coord  [..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+1, :]
-    psi_coord  [..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+2, :]
-    omega_coord[..., :-1, :, 0] = bb_coord[..., ca_i[:-1],   :]
-    omega_coord[..., :-1, :, 1] = bb_coord[..., ca_i[:-1]+1, :]
-    omega_coord[..., :-1, :, 2] = bb_coord[..., ca_i[:-1]+2, :]
-    omega_coord[..., :-1, :, 3] = bb_coord[..., ca_i[:-1]+3, :]
+    coord_for_phi[..., 1:,   :, 0] =  coord_c[..., 0:-1, :]
+    coord_for_phi[..., 1:,   :, 1] =  coord_n[..., 1:,   :]
+    coord_for_phi[..., 1:,   :, 2] = coord_ca[..., 1:,   :]
+    coord_for_phi[..., 1:,   :, 3] =  coord_c[..., 1:,   :]
+
+    coord_for_psi[..., 0:-1, :, 0] =  coord_n[..., 0:-1, :]
+    coord_for_psi[..., 0:-1, :, 1] = coord_ca[..., 0:-1, :]
+    coord_for_psi[..., 0:-1, :, 2] =  coord_c[..., 0:-1, :]
+    coord_for_psi[..., 0:-1, :, 3] =  coord_n[..., 1:,   :]
+
+    coord_for_omg[..., 0:-1, :, 0] = coord_ca[..., 0:-1, :]
+    coord_for_omg[..., 0:-1, :, 1] =  coord_c[..., 0:-1, :]
+    coord_for_omg[..., 0:-1, :, 2] =  coord_n[..., 1:,   :]
+    coord_for_omg[..., 0:-1, :, 3] = coord_ca[..., 1:,   :]
     # fmt: on
 
     phi = dihedral(
-        phi_coord[..., 0], phi_coord[..., 1], phi_coord[..., 2], phi_coord[..., 3]
+        coord_for_phi[..., 0],
+        coord_for_phi[..., 1],
+        coord_for_phi[..., 2],
+        coord_for_phi[..., 3],
     )
     psi = dihedral(
-        psi_coord[..., 0], psi_coord[..., 1], psi_coord[..., 2], psi_coord[..., 3]
+        coord_for_psi[..., 0],
+        coord_for_psi[..., 1],
+        coord_for_psi[..., 2],
+        coord_for_psi[..., 3],
     )
-    omega = dihedral(
-        omega_coord[..., 0],
-        omega_coord[..., 1],
-        omega_coord[..., 2],
-        omega_coord[..., 3],
+    omg = dihedral(
+        coord_for_omg[..., 0],
+        coord_for_omg[..., 1],
+        coord_for_omg[..., 2],
+        coord_for_omg[..., 3],
     )
 
-    return phi, psi, omega
+    return phi, psi, omg
 
 
 def centroid(atoms):
diff --git a/src/biotite/structure/io/pdbx/convert.py b/src/biotite/structure/io/pdbx/convert.py
index 551155f5c..eef76924f 100644
--- a/src/biotite/structure/io/pdbx/convert.py
+++ b/src/biotite/structure/io/pdbx/convert.py
@@ -600,7 +600,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
 def _find_matches(query_arrays, reference_arrays):
     """
     For each index in the `query_arrays` find the indices in the
-    `reference_arrays` where all query values the reference counterpart.
+    `reference_arrays` where all query values match the reference counterpart.
     If no match is found for a query, the corresponding index is -1.
     """
     match_masks_for_all_columns = np.stack(
diff --git a/src/biotite/structure/segments.py b/src/biotite/structure/segments.py
index 5841346b3..f67c24d21 100644
--- a/src/biotite/structure/segments.py
+++ b/src/biotite/structure/segments.py
@@ -16,7 +16,7 @@
 import numpy as np
 
 
-def apply_segment_wise(starts, data, function, axis):
+def apply_segment_wise(starts, data, function, axis=None):
     """
     Generalized version of :func:`apply_residue_wise()` for
     residues and chains.
@@ -36,7 +36,6 @@ def apply_segment_wise(starts, data, function, axis):
             value = function(segment)
         else:
             value = function(segment, axis=axis)
-        value = function(segment, axis=axis)
         # Identify the shape of the resulting array by evaluation
         # of the function return value for the first segment
         if processed_data is None:
diff --git a/src/biotite/structure/util.py b/src/biotite/structure/util.py
index cabbdc8f5..018426061 100644
--- a/src/biotite/structure/util.py
+++ b/src/biotite/structure/util.py
@@ -8,9 +8,18 @@
 
 __name__ = "biotite.structure"
 __author__ = "Patrick Kunzmann"
-__all__ = ["vector_dot", "norm_vector", "distance", "matrix_rotate"]
+__all__ = [
+    "vector_dot",
+    "norm_vector",
+    "distance",
+    "matrix_rotate",
+    "coord_for_atom_name_per_residue",
+]
 
 import numpy as np
+from biotite.structure.atoms import AtomArray
+from biotite.structure.error import BadStructureError
+from biotite.structure.residues import get_residue_masks, get_residue_starts
 
 
 def vector_dot(v1, v2):
@@ -94,3 +103,59 @@ def matrix_rotate(v, matrix):
     if orig_ndim > 2:
         v = v.reshape(*orig_shape)
     return v
+
+
+def coord_for_atom_name_per_residue(atoms, atom_names, mask=None):
+    """
+    Get the coordinates of a specific atom for every residue.
+
+    If a residue does not contain the specified atom, the coordinates are `NaN`.
+    If a residue contains multiple atoms with the specified name, an exception is
+    raised.
+
+    Parameters
+    ----------
+    atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
+        The atom array or stack to get the residue-wise coordinates from.
+    atom_names : list of str, length=k
+        The atom names to get the coordinates for.
+    mask : ndarray, shape=(n,), dtype=bool, optional
+        A boolean mask to further select valid atoms from `atoms`.
+
+    Returns
+    -------
+    coord: ndarray, shape=(k, m, r, 3) or shape=(k, r, 3)
+        The coordinates of the specified atom for each residue.
+    """
+    residue_starts = get_residue_starts(atoms)
+    all_residue_masks = get_residue_masks(atoms, residue_starts)
+
+    if isinstance(atoms, AtomArray):
+        coord = np.full(
+            (len(atom_names), len(residue_starts), 3),
+            np.nan,
+            dtype=np.float32,
+        )
+    else:
+        coord = np.full(
+            (len(atom_names), atoms.stack_depth(), len(residue_starts), 3),
+            np.nan,
+            dtype=np.float32,
+        )
+
+    for i, atom_name in enumerate(atom_names):
+        specified_atom_mask = atoms.atom_name == atom_name
+        if mask is not None:
+            specified_atom_mask &= mask
+        all_residue_masks_for_specified_atom = all_residue_masks & specified_atom_mask
+        number_of_specified_atoms_per_residue = np.count_nonzero(
+            all_residue_masks_for_specified_atom, axis=-1
+        )
+        if np.any(number_of_specified_atoms_per_residue > 1):
+            raise BadStructureError(f"Multiple '{atom_name}' atoms per residue")
+        residues_with_specified_atom = number_of_specified_atoms_per_residue == 1
+        coord[i, ..., residues_with_specified_atom, :] = atoms.coord[
+            ..., specified_atom_mask, :
+        ]
+
+    return coord
diff --git a/tests/sequence/align/test_matrix.py b/tests/sequence/align/test_matrix.py
index 570878945..02a1cbae3 100644
--- a/tests/sequence/align/test_matrix.py
+++ b/tests/sequence/align/test_matrix.py
@@ -6,6 +6,7 @@
 import pytest
 import biotite.sequence as seq
 import biotite.sequence.align as align
+import biotite.structure.alphabet as strucalph
 
 
 @pytest.mark.parametrize(
@@ -13,7 +14,7 @@
     [
         entry
         for entry in align.SubstitutionMatrix.list_db()
-        if entry not in ["NUC", "GONNET"]
+        if entry not in ["NUC", "GONNET", "3Di", "PB"]
     ],
 )
 def test_matrices(db_entry):
@@ -25,6 +26,37 @@ def test_matrices(db_entry):
     align.SubstitutionMatrix(alph1, alph2, db_entry)
 
 
+@pytest.mark.parametrize(
+    "matrix_name, alphabet",
+    [
+        ("3Di", strucalph.I3DSequence.alphabet),
+    ],
+)
+def test_structural_alphabet_matrices(matrix_name, alphabet):
+    """
+    Test for exceptions when reading structural alphabet matrix files.
+    """
+    align.SubstitutionMatrix(alphabet, alphabet, matrix_name)
+
+
+@pytest.mark.parametrize(
+    "method_name",
+    [
+        "std_protein_matrix",
+        "std_nucleotide_matrix",
+        "std_3di_matrix",
+        "std_protein_blocks_matrix",
+        "std_clepaps_matrix",
+    ],
+)
+def test_default_matrices(method_name):
+    """
+    Test for exceptions when using the static methods for getting default matrices.
+    """
+    matrix = getattr(align.SubstitutionMatrix, method_name)()
+    assert isinstance(matrix, align.SubstitutionMatrix)
+
+
 def test_matrix_str():
     """
     Test conversion of substitution matrix to string via a small
diff --git a/tests/structure/data/alphabet/1ay7.bcif b/tests/structure/data/alphabet/1ay7.bcif
new file mode 100644
index 000000000..3ce454e2e
Binary files /dev/null and b/tests/structure/data/alphabet/1ay7.bcif differ
diff --git a/tests/structure/data/alphabet/1cew.bcif b/tests/structure/data/alphabet/1cew.bcif
new file mode 100644
index 000000000..f19f878e8
Binary files /dev/null and b/tests/structure/data/alphabet/1cew.bcif differ
diff --git a/tests/structure/data/alphabet/1mol.bcif b/tests/structure/data/alphabet/1mol.bcif
new file mode 100644
index 000000000..82787d42f
Binary files /dev/null and b/tests/structure/data/alphabet/1mol.bcif differ
diff --git a/tests/structure/data/alphabet/README.rst b/tests/structure/data/alphabet/README.rst
new file mode 100644
index 000000000..afb292bd2
--- /dev/null
+++ b/tests/structure/data/alphabet/README.rst
@@ -0,0 +1,31 @@
+Structural alphabet sequences
+==============================
+
+This directory contains structural alphabet sequences for the test structure files
+from the `tests/structure/data/` directory, generated with the respective reference
+implementation.
+
+3Di sequences
+-------------
+
+The 3Di sequences in `i3d.fasta` were generated with `foldseek` according to
+`these instructions <https://github.com/steineggerlab/foldseek/issues/314#issuecomment-2283329286>`_:
+
+.. code-block:: console
+
+    $ foldseek createdb --chain-name-mode 1 tests/structure/data/*.cif /tmp/biotite_3di
+    $ foldseek lndb /tmp/biotite_3di_h /tmp/biotite_3di_ss_h
+    $ foldseek convert2fasta /tmp/biotite_3di_ss tests/structure/data/alphabet/i3d.fasta
+
+Protein Blocks sequences
+------------------------
+
+Only one sequence is available in `pb.fasta`, that is taken from
+`https://pbxplore.readthedocs.io/en/latest/PBassign.html`.
+`1ay7.bcif` contains the corresponding structure.
+
+CLePAPS sequences
+-----------------
+
+The CLePAPS sequences in `clepaps.fasta` were taken from
+`presentation slides <https://slideplayer.com/slide/8109226/>`_.
diff --git a/tests/structure/data/alphabet/clepaps.fasta b/tests/structure/data/alphabet/clepaps.fasta
new file mode 100644
index 000000000..7edd7291c
--- /dev/null
+++ b/tests/structure/data/alphabet/clepaps.fasta
@@ -0,0 +1,4 @@
+>1mol_A
+RRFEDECCGAIHHHHHHHHHHHHHHHOMICQEECBLDFQNBFEEEEFEQNNGCPLDDEEEDEEENOGCEDEEEEEEPKKOGFEDPLDEQBGCCR
+>1cew_I
+RRCECECAJGBIHHHHHHHHIHHHIGGBLDFFCPLDPLEEFEDPOLCEEEEEEDEFDEAGCAKLAJGKHHIMNGKLQQQDEEEDEEEEEBPKKOGEEDPLEEER
diff --git a/tests/structure/data/alphabet/i3d.fasta b/tests/structure/data/alphabet/i3d.fasta
new file mode 100644
index 000000000..a931e9059
--- /dev/null
+++ b/tests/structure/data/alphabet/i3d.fasta
@@ -0,0 +1,216 @@
+>1aki_A THE STRUCTURE OF THE ORTHORHOMBIC FORM OF HEN EGG-WHITE LYSOZYME AT 1.5 ANGSTROMS RESOLUTION
+DADDLQRVLVLCVVLPPACQVHDHSLLVSQQLCQPPVRAQADWDADPQQWIQGGSNRHIQQEDAPQPRGDNGNNPVVYHSVVSSDPRCPSVVSRVSVVCVPPVHVVVRVSSVVPPPPDPSVVSCPPHDD
+>1crr_MODEL_21_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEAAEDFDQPQCVVLLVVCVAVNDRDPPPCLPDFDWDWHWDCLVHDRYTYTYGDHHPPDDDDDVNLVCLLPGFFYLQEYEQAVCVRVVVSVPVQVSSCVNVVHLDTQHAYEYEPPPDHPRNDDVVVVVVVVVVSVYHYWYYYSVPRPGSCVRVSSRVVSVVVD
+>1crr_MODEL_22_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEALEEWDAQLCLVQLLVCVPVVDGDNCPPQADFDWRKDWDAAPNRIYIYGYGNGGDHPDDDPVSLVVLVVHFQYAQEYAQLVCVRVVVSQVVQVSSCVNVVHLQTLYAYEYEPPPDDRGPHPVVVCVVRCVVSPHYYWYYYSNPRPGSCVRVRSRVVSVVVD
+>1crr_MODEL_23_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEALEAFDDPQCLVLLQVCVAVVDGDPDDALLDADWDWHWDQQDPGIYTYGYHDHGDHPPDDDVSLVVLQVGFFYELEAAQLPCVRVVVVQVVQPSSCVSVVHLDTLYAYEYENPPDDNGNNPPPPVCVRCVVSVHHYWYAYSNPRPGSCCRVSSRVVSVVPD
+>1crr_MODEL_24_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEAEEDWAQFLCLVLLLVCVPVVDGDGCPDQADFDWRWHWDAAPHDIYIYTYGDGHDHDDDDVVSLVCLQVHFFYQLRAAQQPVVRVVRVVVVQVSSCVSVVHLQTQYEDEHEPPPDDDGNHDPPVQCCVQVVSVHHYWYAYSPPRRGSVCRVNVRVVSVVPD
+>1crr_MODEL_25_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEAQEAWDLPQCRVLLLVCVAVNDRDPCPPLPDFDWRWDWDCLPPNIYIYGYGDGRDDDPDDPPSLVVLQVHQFYAQEAAQAPCVRVVVVVVVQPSSCVSVVHLDTQYEYEYEPPPDDRGRRDVVNVCVVVVVSPYHYWYYYSVPRRGSCCRVSVRCVSVVVD
+>1crr_MODEL_26_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEQLEAFDPPLCLPLLQCCQPVNDRDPPPDAQDADWHWDWDAQPNPIYIYIYGSGGDDDDPDDVSLVCLQPHFNYAHEAAFAPCVRVVVVQVVQVSSCVSVVHQQTLHAYEYENPPDDDGNDDPVRVCVVCVVSVHYYWYAYSVPRRGSCVRVNSRVVSVVPD
+>1crr_MODEL_27_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEEQEDWDQPLCLVQLVVCVPVPAGDPDPDQADADFDKDWDALDPRIYIYRYGSGGPHDDPPCVSLVCLQVHFFYAYEAEQLPVVRVVVCLPVVVCSCVSVVHLDTLYAHEYENPPDPDGNRDPPVVCCRQVVSPHHYFYAYSNVRHGSSVRVSSRCVSVVVD
+>1crr_MODEL_28_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DEEAEAEEAWAAPLCPVLLVVCVPVVDRDDPDQLQDADWDWDFDPLVDDTYIYTYGDHRHDCPPPDVSLLSLQVHQQYALEAAQAPVVRVVRVVVVQPSSCVRVVHRDGQHAYEHEPPPDDDGPHDPVVQCCVQVVSVHYYYYAYSPPRRGSSVRVSVRVVSVVPD
+>1crr_MODEL_29_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DEEAEALEEFAQPQCLVLLLCCVPVNDRDDRVQLQDFDKDWDWDQQPNPIYIYIYGNGRHDPDDDCVSLVCLQVHFNYALEAAQVDCVRVVVSVVVQVSSCVNVVHLDTQYEYEHENPPDDPGPRDPVNVCVPCVVSVYYYFYYYSVPRHGSCVRVNSSVVSVVVD
+>1crr_MODEL_30_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEALEAWDQPQCPVLLQVCVPVVAGDPCPPLVDFDWGWDWDPQVDDIYTYTYGDDHDDPPDPVVSLVVLQGGFNYALEAAQQDVVRVVRVVVVQVSSCVSVVHLDTQYEYEYEDPVDDDGPHDPVRQCCVQVVSVHHYFYAYSNHTPRSCVRVNSRVVVVVVD
+>1crr_MODEL_31_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEALEEFDLPLCQVLLLVCVAPNDRDPCDPQANWDWDWHWDPLPPDIYTYTYGYDGHDDDDDDVSLVVLQVGFQYEQEYAQLVCVRVVVCVPVVVSSCVNVVDLQTLYAYEHEDPPDDRGDHPVPVSCVVQVVSVHHYFYAYRNVGGRSSVRVSSRVVSVVPD
+>1crr_MODEL_32_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DEEAEQLEAWDQPQCLVQLQVCQPVNDGDPPPPLQDFDWGKDWDDQPPDIYIYTYGDDHDDPPPRVVSLVCLQVGFNYAYEAEQVPCVRVVVVVCVQPSSCVNVVHQDTQHAYEYENCPDDHGNDDPVNVCVVCVVSVHYYFYYYSVVRHRSSVRVNSSCVSVVVD
+>1crr_MODEL_33_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DEEAEQLEAFDQPQQPVLLLVCLVPNAGDPPPDLPDFDWDKDWDCLDNRIHIYTYGDGRQPPDDDDVSLVRLQGGFLYALRAAQAPVVRVVRVQCVQPSSCVNVVHLQTLYADEHEPPPDPDGPHPVVVVCVVQVVSPHYYYYYYRRPRRRSSVRVNSRVVSVVVD
+>1crr_MODEL_34_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEEQEAWDPQLCLVQLQVCVVPNAGDNPPPLQDFDFGWDQDAFPNHTYTYGYGSGHDHDPDDPPSLQVLQVGFFYAQEAEFAPRVRVVRCVVRQPSSCVNVVHLDTQHAYEHENPPDDHGNDPVVVSCVVVVVSPYHYYYYYSHPRRCSCVRVRSRVVSVVVD
+>1crr_MODEL_35_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DEEFEAAEAFDDPLQPPLLQVCQAPNARDPCPDLPDAFWGWHWDAQVNRIYTYTYGDGRDCPDDNVDNLVCLLGGFLYALRAEQQDVVRVVRVQCVQVSSCVSVVHNQGLYAYEHEPPPDPDGPHPVVNSVVRCVVSPHHYYYAYNNVGRRSCVSVNSSVVSVVVD
+>1crr_MODEL_36_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DEEAEALEDFDLPQCLVQLLVCVVVVAGDDDDPQQDFDWRWDWDDQPPHTYIYTYGNGGDNPDDDDVSLVVLQVHFNYALEAAQQDVVRVVVVVVVQVSSCVSVVHLDTQYAYEHENPPDDPGNHDPVVVCVVCVVSVHYYFYYYSNPRGRSSVRVNSRVVSVVVD
+>1crr_MODEL_37_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEALEAFDQPLCPVQLQVCVVVNDGDDDDQLQDADWGWDFDDQDPDTHIYTYGDGRDDPPPPDVSLVVLQVHQNYAHEAEFAPCVRVVVVLCVLVSSCVSVVPLQGLYAYEHEPPVDPDGNDDPVNVVVVCVVSPHDYFYAYSVHRPRSSVRVNVSVVSVVVD
+>1crr_MODEL_38_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DEEAEALEAFDAPLCPPLLQVCVPVNDRDPPPPQPDFDWGKDWDALPPHIYIYTYGDGGDDPDDCVVSLVVLQVHFFYAHEAAQLDVVRVVRVVVVQVSSCVSVVHLQTLYEHEYENPPDDNGNHDPPVQCVVCVVSVHHYFYYYSVVRHGSNCRVSVRVVSVVVD
+>1crr_MODEL_39_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DAEAEQLEAFDQPQCPVQLVVCLVPNAGDPDPPLPDFDWDKDFDQQVNRTYIYTYGDHRDDDPDDPPSLQVLQVGFQYAHEAAQVPVVRVVVVVVVLVVSCVNVVHLDGLYAYEHEPPPDDDGNRPVVVVCVVCVVSVHYYWYAYSNHRPGSCVRVSSRVVSVVVD
+>1crr_MODEL_40_A THE SOLUTION STRUCTURE AND DYNAMICS OF RAS P21. GDP DETERMINED BY HETERONUCLEAR THREE AND FOUR DIMENSIONAL NMR SPECTROSCOPY
+DEEAEALEEFDAPLCVVQLVVCQPVNDRDPDDDLVDFDWAWHWDAQPPHIYIYIYGDHRDDDDDDDVSLVCLQVGFQYALEAAQAPCVSVVVSVVVLVSSCVNVVHLQTLYAYEHEPCPDPHGNCPPVNCCVSAVVSPHHYYYYYSVVRPCSCVRVNSRVVSVVVD
+>1dix_A CRYSTAL STRUCTURE OF RNASE LE
+DDDPQLAWWKKWKWWFVLQVDQAPDHWAFAVVDRADRFIATQAIAGHHLVQHGAWQQPPPPFDDCVVVVVCVVVCQHHHGGRHPPHYNCVVRLRRRCRTGVVSLCVQQVDPNSRVVLSSVVCVVQRLQVLCVVLVHHLPWFWAFPVSSQVSSCVVPVEGKDFAWDAGHVGFTATTMIIWMGGSNSPDTDHDSDDHDHDRDTIHTRHGD
+>1f2n_A RICE YELLOW MOTTLE VIRUS
+DDDFKDKDWDKAFQWFQKFALPAQFWDKAFPAQLSGVVSLLVQLQFFKKAWPKKKKWWAFDDDPPDFWKKKKAKALFQPDDTDSADVSSVPHHQIFMDTLHDANVCVCCNPPHCCPRIGMRIDDRDPGDIATHANDAPVVDDRVVRSRHTRIMMIMGINDNDNHIDGGTTMMITIMMMGGHGDDSVPGD
+>1f2n_B RICE YELLOW MOTTLE VIRUS
+DDDQKAKDKDKAFQWFQKFAQPALWWDKAFPAQLSGVVSLLVQLQFFKKAWDKKKKWKAFDDDPPDFWKKWKAKALFLVDDTDSADVSRVPHHQIFMGGRRFANVQVVCNPPRDPDSHGMRIDPRVPQDMATHANDAPNVDDRVVRSSHGRIMMTMGINHRDNHIDGGITMMITIIMMGGHGDDSVVGD
+>1f2n_C RICE YELLOW MOTTLE VIRUS
+DDDDDDDDDPDDDDDDPADPDDDPGDQKDKDWAKAFQWFQKFAQPALFWDKAFPAQLSGVVSVVNQLQFFKKAWDKKKKWKAFPDDPVDAWKKWKAKDLFQPDDTDSADVSSVVGHQIFMGGLRFANCCVVCNPPHCCVGIGMRIDDRVPGDMATHANDAPNPDPSVVSSSHTRIMMIMGINHRDNHMDGGTTMMITIIMMGGHGDDSVVGD
+>1gya_MODEL_1_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDFDEDPAEDAAQFKFKADFDPDDDDDFFAKKFKAFVVPRHGAFMTGHPPDGDHPDLCWDADDSGMIIGHGDDPVSWGWMWIFTGGPVPDSPDIHTYGYYYDDD
+>1gya_MODEL_2_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDFEAEAEDAFPFKWKADDDPDFDDDFFWKKWKAFVPPRGTQFITADPPDGDHDDLCWYADNGQMIIHGGAHQVSWTWMKMWTGGNPPDTDDIGTYGGDYDDD
+>1gya_MODEL_3_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDQEDPAEDEAFDKDKADDDPDFDDDWFFKKWKAFVPPTGTQAITADPPDGPDPDLCWYADPRGMTIHHGDDVVSWTWMKIFTGTNVPHRPDIGTYGYDYDDD
+>1gya_MODEL_4_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDFEDDDEDAFQFKDKDAFDPDFDDDFFAKWFKAFPPRRHGCFMGDDPPDGRDPDLCWYADPRQMIMRGGHHQVSWGKMKIFTGGPVPDRPGIHTYTYHYDDD
+>1gya_MODEL_5_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDFFEDEAEDAAFFKDKADDDPAFDDDFAWKWFKAFDPVRGTQFMTADPPDGHDPDQCWYADRRQMIIHGTDGQVSFGWMWIFGGGHVPDRPDIHTYGYHYDDD
+>1gya_MODEL_6_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDFFEDAAEDAAQFKGKFDDDPDADDQFFQKKFKAQVPPGHTQAIGDDPPDGDDPDLCWYADPRHMIIHHGHHPVSWTWMKMFTGGPVPDSPDIGTYTYDHDDD
+>1gya_MODEL_7_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDQEDPAEDAAFFKDKADDDPDFDDDFFFKKFKAFVVVRHGDFMTQPPPDTDDDDLCWYADRRRMIMGGTHGQVSWTWMKMFTGGNVPDRPDITTYTYGYDDD
+>1gya_MODEL_8_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDFEDDAEDAAFAKDKADDDQDADDDFFFKKFKAFVPPGHTQAITDDPPDGHDPDLCWYADRGRMTIHHGDHVVSWGWMKIFTGGNVPDRPDIGTYGHYYDDD
+>1gya_MODEL_9_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDFEDEAEDAAFFKGKAADDPDADDVWFFKKFKAQVVVRGGLFMTDDPPDGDAPDLLWHADNRNMTIHGGDDPVSWTWMKIFTGTPVPDRPDIGTYGYYHDDD
+>1gya_MODEL_10_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDAADPAEDAAFFKDKFDFDPDADDDWFQKWFKFFPPVTGGAFMGDHPPDGDHPDLCWYADPRQMTMHHGDHPVSFGWMWIFTGGPVPDRPDIHTYGYYYDDD
+>1gya_MODEL_11_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDFEDDAEDAAQDKDKFDFDPDADDDFFFKKFKAQVPPGHTQFITDDPPDGDHPDLCWYADPRRMIMHGGDDQVSWGWMKIFTGTPVPDRPDIGTYTYYYDDD
+>1gya_MODEL_12_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDFEDDAEDAFQGKDKAADDPDADDDFFFKKFKAQVVVRHTQAMGDDPPDGHAPDQCWHADRRQMITHGGDDPVSFTWMKIFTGGPVPDRPDIGTYTYDYDDD
+>1gya_MODEL_13_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDQEDEAEDAQPWKDKAAFDPDFDDDFAFKWFKFFDPVRGGQFMGDDPPDGDDDDLCWYADRRQMIIHGGQRPVSFGWMKIFGGGNVPDRPDIGTYGGYYDDD
+>1gya_MODEL_14_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDFEDEWEDAFQFKTKAQFDPDDDDDFFFKKFKAFVPVRHGQFIGDPPPDTDDDDLCWYADPNNMIMRGTHHVVSFGWMWIFGGGNVPDSPDIGTYGGDYDDD
+>1gya_MODEL_15_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDFEDEAEDAAFFWDKAAFDPDFDDPWFFKKFKAFDPVGGTQAIGDDPPDGDAPDQCWHADRGRMTIHGTGGQVSWGWMKMFTGTPVPDRPDIGTYGYHYDDD
+>1gya_MODEL_16_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDFADDAEDAAQDKDKFDDDPPFDDDFFFKKFKAQVPVRHTDAIGDDPPDGDDPDQQWYADRRRMIMGGTHDPVSWTWMKMFTGGNVPDRPDIHTYGGDYDDD
+>1gya_MODEL_17_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDFFADEAEDAAQFKDKDDDDPDFDDDQWFWKFKAQVVVRHGQFIGDDPPDGDDPDLCWYADNGQMTMHGGGHQVNFTWMKIFTGGNVPDGPDIHTYTYDYDDD
+>1gya_MODEL_18_A N-GLYCAN AND POLYPEPTIDE NMR SOLUTION STRUCTURES OF THE ADHESION DOMAIN OF HUMAN CD2
+DDDDFEDPAEDAAQGKDKADDDPDADDDFFFKKWKAFVPPGGTQAITDPPDDGRAPDQCWHADRRRMIMGGGDDPVSFGWMKIFTGGNVPDRPDIGTYGYYYDDD
+>1igy_A STRUCTURE OF IMMUNOGLOBULIN
+DDKAKPAQEAEEAWQAKDKIKMFDPWFQFQAKWKWWDAPPDDIHTAAGGQWHGDPPHDPQWTWHDGTGMIMIIGRTADLVRFTKMWMWRDRDPPIDIHPIHGYQYDDDWDFWDKDWDWFDPVVLVVWKTKIKMKTAQTPDPPKDKAKDLADGHRDDQKDKDWDDQDSPSRGIIIMIMHMDTSCVSVVHFKIKTWMDDPVDPDIDIDIGTDDPD
+>1igy_B STRUCTURE OF IMMUNOGLOBULIN
+DDWAKDDAEEDEQFAKDKMKTFDDDDQQLQWKKWKWWAADVGDIGTAWIAGNPRPDIDGDDVQPPQKDWDADSVRGIIMIMHGGHHQVVFTWMWMAIDVPGPHIHDTDTYGHDPDDFWAWDKDWFAPQCDPCPPQKDKTKIKTWFGPDDDKDKDKPNVPDDDDKAWDDWDDDPRMTITMMMHIGGRPCPQPPWMWIWMAGVVVGDTDIHTRDHDDPDPPPPDDDDPPDKDKDKAFFALCQLQVQVHWTWIKMKIWAAAPAWPQQDKWKDFPNDIDDFPPQWPWFDCPPNHTMTMGGGTDHSVSLVVWTWIWIFTDTDPDPDRDTGIDTHDDDDWFFWPKDKADFDPVPDPDFKGKIKIKTWFTDDQNKDKFKAFLHDGDDDWDKDGWDQDDVRGTMIMIIDMDGNVSQVVFSKMKMKMADVVDDSRIDIDIDTD
+>1igy_C STRUCTURE OF IMMUNOGLOBULIN
+DAKAKPAQEAEEAWQAKDKIKMFGPWFQFQAKWKWWDAPPDDIHTAAGRQWHGDPPHDPQWTWHDGTGMIMIIGRTAALVRFTKMWMWRDRDPPIDIHPIHGYQYDDDWDFWDKDWDWFDPVVLVVWKTKIKMKTAQTPDPPKDKAKDLADGHRDDQKDKDWDDQDSPSRGIIIMIMGMDTNCVSVVHFKMKTWMDDDVDPDIDIDIGGNPPD
+>1igy_D STRUCTURE OF IMMUNOGLOBULIN
+DDWAKDDAEEDEQFAKDKMKTFDDDDQQLQWKKFKWWAADVGDIGTAWIAGRPRPDIDGDDVQPPQKDWDADSVRGIIMIMHGGHHQVVFTWMWMAIDVPGPHIHDTDTYGHDPDDFDAWDKDWFAPQCDDCPPQKDKTKIKTWFGPDDDKDKDKPNVPADPDKAWDDWDDDPRMIITMMMHIGGRPCPVPPWMWIWMAGVVVGDTDIHTRDHPDDDDDQPQDDDDQDKDKDKAFFALVQLQDQVHWTWTKMKIWAAAPAWDQQDKWKDFANDIDDFPPQWPWFDCPSNHTMTMGGGTDHSVSLVVWTWIWIFTDTDSDPDRDIGIDTHDDDDWFFWPKDKAFFDPVPDPDFKGKIKIKTWFTDDQSKDKFKAFLHDGDDDWDKDGWDQDDVGGTMIMIIDMDGPVSQVVFSKMKMKMADVVDDSRTDIDIDTD
+>1l2y_MODEL_1_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DQQVVCVVCPNVVNVDHGDD
+>1l2y_MODEL_2_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCVNPVNPDHGDD
+>1l2y_MODEL_3_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCVNVVVVDHGPD
+>1l2y_MODEL_4_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DQQVVCVVCPNVVNPDDGDD
+>1l2y_MODEL_5_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCVNPVNVDHGDD
+>1l2y_MODEL_6_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCVNPVNVDHGPD
+>1l2y_MODEL_7_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPAPVNVDHGPD
+>1l2y_MODEL_8_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DQQVVCVVCVNPVNPDDGPD
+>1l2y_MODEL_9_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPNPVNVDDGDD
+>1l2y_MODEL_10_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPNPVNPDDGDD
+>1l2y_MODEL_11_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVVPNCVVVDHGDD
+>1l2y_MODEL_12_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCVNPVNPDDGPD
+>1l2y_MODEL_13_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCVHPPNPDDGDD
+>1l2y_MODEL_14_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCCVCPNVVNPDHGDD
+>1l2y_MODEL_15_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPNVVNPDDGDD
+>1l2y_MODEL_16_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DQQCVCVVCPNVVVVDHGDD
+>1l2y_MODEL_17_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DVQVVCVVCPAPVNVDHGDD
+>1l2y_MODEL_18_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPAVVNPDHGDD
+>1l2y_MODEL_19_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPNVVNVDHGDD
+>1l2y_MODEL_20_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DQQVVCVVCPNVVVVDHGPD
+>1l2y_MODEL_21_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DQQVVCVVCVNCVVVDHGDD
+>1l2y_MODEL_22_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPNVVNPDHGPD
+>1l2y_MODEL_23_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DQQVVCVVVVNCVVVDHGPD
+>1l2y_MODEL_24_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCVNVVNVDDGDD
+>1l2y_MODEL_25_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DVQVVCVVCVNCVVVDHGDD
+>1l2y_MODEL_26_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPAVVNVDHGPD
+>1l2y_MODEL_27_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCVNVVNVDHGPD
+>1l2y_MODEL_28_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPNVVNPDHGDD
+>1l2y_MODEL_29_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DQQVVCVVCPNPVNVDDGDD
+>1l2y_MODEL_30_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPHPVNPDDGDD
+>1l2y_MODEL_31_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPNVVNVDHGDD
+>1l2y_MODEL_32_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCVNPVNPDDGDD
+>1l2y_MODEL_33_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DQQVVCVVCPNVVNPDHGPD
+>1l2y_MODEL_34_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPNPPNPDDGDD
+>1l2y_MODEL_35_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPAVVNPDHGPD
+>1l2y_MODEL_36_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DQQVVCVVCPNVVVVDHGDD
+>1l2y_MODEL_37_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPNVVNVDHGPD
+>1l2y_MODEL_38_A NMR Structure of Trp-Cage Miniprotein Construct TC5b
+DCQVVCVVCPNVVNVDDGDD
+>1o1z_A Crystal structure of glycerophosphodiester phosphodiesterase (GDPD) (TM1621) from Thermotoga maritima at 1.60 A resolution
+DDDDAQEAAEQALVVVDPGVFLVRQLVLVVLPHLGYEWEWDAALVGFTWTDPDQADCPPQNDGGGRNRHHPVRVCVSVVNGIDGPVVSPVRDDLSGAYEYEYPDLSNVVVVVVVCPVGPRYEYEYCPVVSCQVRVPPHAYEYEDDPPSCPDVVSNLVVCVVRVHQEYEYELVLVVDVVSLVSLQVSVVVNRAYEYDDDDDVVSCVVCVNSHRYYHYSNSNVVVVVD
+>2axd_MODEL_1_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DPPCCVVVPPVNVLLVVLLVLLLQQLCVVVVVPDGDPVVSQVVNPPVVSVSSVVSNVVSVVVSVVVPVPDPDDDDD
+>2axd_MODEL_2_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DPDDCVNVPVVVVVLVVLLVLLLLLLCVVVVVDDGDNVVSPVVDDPVCVVSSVVSNVVSVVCSVVVHDPDDPPDDD
+>2axd_MODEL_3_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DVDVPCVDDVVVVLLVVLLVLLLLLLVVVVVVPPRPNVVSVVPRDPVPVVSNVVSNVVSVVVSVCVVPDDPPDDDD
+>2axd_MODEL_4_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DPDDCVVDPPPVVLLVQLLVQLLLVLCVVVVVDDGPPVVLVVSQDPVPVVSSVVSNVVSVVCSVCVVVPDCPPPDD
+>2axd_MODEL_5_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DVDDDCVVPVVNCVLVVLLVLLLLLLVVVCPVPDGDNVVSLVVDDPVPSVSNVVSNVVNVVLVVVVVNPPPPDDPD
+>2axd_MODEL_6_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DDDDPVCDDVVVCLLVVLLVLLLLLLCVVVVVPPGPVCVSVVSQDPVPNVSNVVSNVVSVCVSVLVPGNDPNDDDD
+>2axd_MODEL_7_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DQDPVCCDDPVNCLLVVLLVVLLLVLCCVPVVDDRPNVCNVVSDDPVCPVSNVVSNVVSVVVSVCVVVPPDDDDDD
+>2axd_MODEL_8_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DPPVVLVDDVCNVVLVVLLVLLLLVLCVVVPVPDGDPVVSVVVDDPVVVVSSVVSNVVSPVCSVCVPPPDPDDDDD
+>2axd_MODEL_9_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DDDDVVVPDCVPVLLVVLLVLLLQVLCVVVVVDDGDPVVLVVSDDPVCVVSSVVSNVVNVVCVVVPVPVDDDPPDD
+>2axd_MODEL_10_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DQCDPCSPPPVVVLLVVLLVLLLLLLCVVVVVPDRDNVVSLVVRDPVPNVSNVVSNVVSNVCSVVVCPDDDDDDDD
+>2axd_MODEL_11_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DPPCCVVDPPVVVLLVVLLVLLLVVLVCVCVVPDDDPVVSLVVDDVVPNVSSVVSNVVSVVCSVVVVVDPDDDDDD
+>2axd_MODEL_12_S solution structure of the theta subunit of escherichia coli DNA polymerase III in complex with the epsilon subunit
+DPDPCVVPDCNVCLLVVLLVLLLLQLCVVVVVDDGDPPVSQVPRDPVPPVSSVVSNVVSPVVSVVVVNVDDDPDDD
+>2d0f_A Crystal Structure of Thermoactinomyces vulgaris R-47 Alpha-Amylase 1 (TVAI) Mutant D356N complexed with P2, a pullulan model oligosaccharide
+DALDFEFDQLFWDDDCFFVFWPCLAAAQQDKIKGKIKGFPHRADWKWKWKAWPVVRDIDIWTWAWDQAFVQNGMTIIMTIDGGGQTKMFIKMWTGRVNQIWIQALLGIGRDDDPALTAIDHHPFDAFVCLLQFAEEEDLQQFEFQQDQVLAQDFQNDDDPPATAHEDEAPDDLCDDPRHHSFRYGDQGALNGVQVCVLLVCPLQNGAEYEYQDQADDRTSRQLQAQDLPWGHSSRPTVVSLLVSLVSSCDPPSHPGHAYEYAQNQFWHFCCHLLNNPPCPDPAAHLLRDCPHPSVQQFAAPPGPPRGQADVPPPRTTGGAPPDPPDVSLQQCFDDCNHSLNVQCAPPHNHAEYEYDLLQQRYHNNGRHPDPRSLVSQLVRLCRNCVSPVRRAYEYEDQEQCLVQAVNSRGHLEYAPNQQAQQLLLQDQLCAGQQLHHHHDFLVVSVSSNSVSCSSHHQSHQSNHEHENHEQATFQSCVSNVNDVLLLQLSLLDLLQARHHRYYHPPSQLSDGAHGPPRRSHRDDVVSSHCVRVSSVSSSLSSVVSSVDPQSSRFDKAWFDRDRVQRKTWIWGDHPPKIKIKIFGSAQAKDKDWTQSVSVVDDAQDKWAFSVVRDMFGHHPSTTIGIAGHSHMTITMD
+>3o5r_A Complex of Fk506 with the Fk1 domain mutant A19T of FKBP51
+DVLVCQVVPFDQQDPVPPSQKTKDWDFAADDQDAADAFKKWWKWKWKDWPVGDTDDTPVVVVGTDIDGAPPPPAFQNVNRVSRRHHAFIKMKMWGFCNRHVAQVFDPPRGHGRITMIMIMHTHDIDHD
+>5eil_A Computational design of a high-affinity metalloprotein homotrimer containing a metal chelating non-canonical amino acid
+DDPLQVQLCCLLVQNPVSNVVSVVVPHDLCRADQQQDGSLLSNLQNLRLVSNVVSVVVPRQQCRQGNQQDGSLLNNLLVLNQNSNVVSLVSPHDQCRQTNQGDGSLLNNLLNLNQNNNVVSVVSPRDQCRATNVRDGSLRSNVVNVNVVSVVVSVVD
+>5eil_B Computational design of a high-affinity metalloprotein homotrimer containing a metal chelating non-canonical amino acid
+DDPLQVVLLVLLVQVPVVNVVSVVVPHDLCRADPQQDGSLLSNLQNLNLVSNVVSVVVPHQQCRQGNQQDGSLLNNLLNQNLNSNVVSVVSPHDQCRQGNQGDGSLLNNLLNLNQSVNVVSVVSPHAQCRATNVRDGSLNNNVVNVNVVNNVVSVPHD
+>5eil_C Computational design of a high-affinity metalloprotein homotrimer containing a metal chelating non-canonical amino acid
+DDPLQVQLLVLLVLPPVSNVVSVVVVHDLCRADPQQDGSLLSNLQNLNLVSNVVSVVVPRDQCGQGNQQDGSLLNNLLVQNQNSNVVSVVSPRDQCRQTNQGDGSLLNNLLNLNVVNNVVSVVSPHQQPRATNVRDGSLNSNVVNVNVVSNVVSVPVD
+>5h73_A Crystal structure of human DHODH with 18F
+DLLVCLQPPDLVVCVVVDFVVVSLVVVQVCLLVLVAPQDDDDQDVQQWDDAQNAIFSHQEEADEDSAAQLRRQVSRVSLPGREYEYHQAWAAADQFDDPPFKDDQVVQLKIFGQRGGHHNHLVRSLVSLVVCQVVQVVCRVVRYFYEYEYDYDQPDPDGLVRLLVCCQRCLLSGQEYEDEQAFQNRPPSVVCLAQVNVLVSVVSNVVSLVPDDVNSRHQYEYEEALPDDPVSLLSNLVSCVVRVRQAYEFHDFDQDQDPDRDGPCSPPGGGMAADVCQVSRLVVLLVNCVSNVLPHAYEGEGHDQALVSQLSSQLSAHPHYYYDSNCSNRNSVSSSRNSVRVVVVCVVVVHSHSVRNRNNVVD
+>5ugo_A DNA polymerase beta nick complex with imidodiphosphate
+DLPVLLLVQLQVVLLLCCQPVVNNVSNVLSNVLSVQVVPPPDNDDALVVSCVTDSRHDVSRVQRRVCNVPVDGPVVVVCCVDQLSVQRSVLCLQFPDGNVNSNVCVVVVQRDLVSCVVPLVPDDPLRNVSSVQVPLLPDKAFVVVVVVVVVVLQVQVCVVPVQKDKDWAEVVQLPDRIGSATEIEIEGQCFAPVHNNDPCSVVSSVVSCVVVQFFDFFSDDDRFKTWGWGFDDDDPPDDGRRIGIYMYGYAHVQQQLLQRVVRNADPVLVVVLQVLQVVLQWGGDSGAIFGADPVRDTDDGDDDDHNVVSCVSSVHGDDRSNVRPD
+>5zng_A The crystal complex of immune receptor RGA5A_S of Pia from rice (Oryzae sativa) with rice blast (Magnaporthe oryzae) effector protein AVR1-CO39
+DVQAFFKKKWKKFAPQQDPVSLVVLQVLLCPFPFWDHWDQDDPNSGMITTIGGPGDVVVSQVRRCVVRVDMDTDDMDGD
+>5zng_C The crystal complex of immune receptor RGA5A_S of Pia from rice (Oryzae sativa) with rice blast (Magnaporthe oryzae) effector protein AVR1-CO39
+DPAFKKKFKDDPHDTPDIDGDHAQDWDDDPNDIWGQHNQRAIPPQQDPVRIGMGMHTDPVHD
+>7gsa_A PanDDA Analysis group deposition -- Crystal structure of PTP1B in complex with FMOPL000260a
+DQVVVVLVVCVVVVCLVVVVVVLVVPADDADQVLCPDPVNPQQAPDPPFAFHPVFAQFQPDPPRRDHRWGWQQDVVLRWIAIFHAAGDPVCLLVVVSSCVVLQFQEEEEEEDCDAPRDGLHDPSADPDQPDWDADPVQQKIKGWDDWDDDDQWIKTWMWIARNVVRDIDIHIYIYGYPAHSDFGDPACVVVLVSLVVCVVVCRGPPVGGHHYYYYRSRAAVVLLSVLLVSLLSVLVPDPHNSPRDSSVSSSSSSVITPCHHPDSRSVSSSSNSNVVVCPVVPPD
diff --git a/tests/structure/data/alphabet/pb.fasta b/tests/structure/data/alphabet/pb.fasta
new file mode 100644
index 000000000..3d7cb3f8c
--- /dev/null
+++ b/tests/structure/data/alphabet/pb.fasta
@@ -0,0 +1,2 @@
+>1ay7
+ZZdddfklpcbfklmmmmmmmmnopafklgoiaklmmmmmmmmpacddddddehkllmmmmnnommmmmmmmmmmmmmnopacddddZZ
\ No newline at end of file
diff --git a/tests/structure/data/ids.txt b/tests/structure/data/ids.txt
index ec9fcc5ab..e905de2f8 100644
--- a/tests/structure/data/ids.txt
+++ b/tests/structure/data/ids.txt
@@ -16,4 +16,4 @@
 5eil
 4p5j
 1crr
-7gsa
+7gsa
\ No newline at end of file
diff --git a/tests/structure/test_clepaps.py b/tests/structure/test_clepaps.py
new file mode 100644
index 000000000..a9d74cb7a
--- /dev/null
+++ b/tests/structure/test_clepaps.py
@@ -0,0 +1,82 @@
+from pathlib import Path
+import numpy as np
+import pytest
+import biotite.sequence.io.fasta as fasta
+import biotite.structure as struc
+import biotite.structure.alphabet as strucalph
+import biotite.structure.io.pdbx as pdbx
+from tests.util import data_dir
+
+
+def _get_ref_3di_sequence(pdb_id, chain_id):
+    """
+    Get the reference CLePAPS sequence for the the structure with the given
+    PDB ID and chain ID.
+    """
+    ref_3di_file = fasta.FastaFile.read(
+        Path(data_dir("structure")) / "alphabet" / "clepaps.fasta"
+    )
+    return strucalph.ClepapsSequence(ref_3di_file[f"{pdb_id.lower()}_{chain_id}"])
+
+
+@pytest.mark.parametrize("pdb_id, chain_id", [("1mol", "A"), ("1cew", "I")])
+def test_to_clepaps(pdb_id, chain_id):
+    """
+    Test the structure conversion to CLePAPS based on a reference example from
+    presentation slides.
+    """
+    pdbx_file = pdbx.BinaryCIFFile.read(
+        Path(data_dir("structure")) / "alphabet" / f"{pdb_id}.bcif"
+    )
+    atoms = pdbx.get_structure(pdbx_file, model=1)
+    atoms = atoms[struc.filter_amino_acids(atoms)]
+    chain = atoms[atoms.chain_id == chain_id]
+    test_sequences, _ = strucalph.to_clepaps(chain)
+
+    ref_sequence = _get_ref_3di_sequence(pdb_id, chain_id)
+
+    # Only a single chain was used as input -> expect only one sequence
+    assert len(test_sequences) == 1
+    assert str(test_sequences[0]) == str(ref_sequence)
+
+
+@pytest.mark.parametrize("pdb_id, chain_id", [("1mol", "A"), ("1cew", "I")])
+def test_missing_residues(pdb_id, chain_id):
+    """
+    Like, `test_to_clepaps()`, but in some residues backbone atoms are missing.
+    Expect that these and adjacent residues get the unknown symbol 'R' in the
+    CLePAPs sequence.
+    """
+    N_DELETIONS = 5
+    # The 'R' symbol
+    UKNOWN_SYMBOL = strucalph.ClepapsSequence.unknown_symbol
+
+    pdbx_file = pdbx.BinaryCIFFile.read(
+        Path(data_dir("structure")) / "alphabet" / f"{pdb_id}.bcif"
+    )
+    atoms = pdbx.get_structure(pdbx_file, model=1)
+    atoms = atoms[struc.filter_amino_acids(atoms)]
+    chain = atoms[atoms.chain_id == chain_id]
+
+    # Randomly delete some backbone atoms
+    rng = np.random.default_rng(1)
+    del_backbone_residue_ids = rng.choice(
+        np.unique(chain.res_id), N_DELETIONS, replace=False
+    )
+    chain = chain[
+        ~np.isin(chain.res_id, del_backbone_residue_ids) | ~(chain.atom_name == "CA")
+    ]
+
+    test_sequences = strucalph.to_clepaps(chain)
+
+    # Apply the same deletions to the reference sequence
+    ref_sequence, _ = strucalph.to_clepaps(chain)
+    for res_id in del_backbone_residue_ids:
+        seq_index = res_id - chain.res_id[0]
+        # Convert the symbol for residue and adjacent ones to 'R'
+        start_index = max(0, seq_index - 2)
+        end_index = min(len(ref_sequence), seq_index + 1)
+        ref_sequence[start_index : end_index + 1] = UKNOWN_SYMBOL
+
+    assert len(test_sequences) == 1
+    assert str(test_sequences[0]) == str(ref_sequence)
diff --git a/tests/structure/test_i3d.py b/tests/structure/test_i3d.py
new file mode 100644
index 000000000..402554608
--- /dev/null
+++ b/tests/structure/test_i3d.py
@@ -0,0 +1,112 @@
+import re
+from pathlib import Path
+import numpy as np
+import pytest
+import biotite.sequence.io.fasta as fasta
+import biotite.structure as struc
+import biotite.structure.alphabet as strucalph
+import biotite.structure.io.pdbx as pdbx
+from tests.util import data_dir
+
+
+def _get_ref_3di_sequence(pdb_id, chain_id):
+    """
+    Get the reference 3di sequence for the first model of the structure with the given
+    PDB ID and chain ID.
+    """
+    ref_3di_file = fasta.FastaFile.read(
+        Path(data_dir("structure")) / "alphabet" / "i3d.fasta"
+    )
+    for header, seq_string in ref_3di_file.items():
+        # The first model of a structure is also the first sequence to appear
+        # and thus to be matched
+        if re.match(rf"^{pdb_id}(_MODEL_\d+)?_{chain_id}", header):
+            ref_3di_sequence = strucalph.I3DSequence(seq_string)
+            break
+    else:
+        raise ValueError(
+            f"Reference 3Di sequence not found for {pdb_id} chain {chain_id}"
+        )
+    return ref_3di_sequence
+
+
+@pytest.mark.parametrize(
+    "path", Path(data_dir("structure")).glob("*.bcif"), ids=lambda path: path.stem
+)
+def test_to_3di(path):
+    """
+    Check if the 3di sequence of a chain is correctly generated, by comparing the result
+    to a reference sequence generated with *foldseek*.
+    """
+    if (
+        path.stem
+        in [
+            "1dix"  # `get_chain_starts()` does not work properly here with `use_author_fields=True`
+        ]
+    ):
+        pytest.skip("Miscellaneous issues")
+
+    pdbx_file = pdbx.BinaryCIFFile.read(path)
+    if np.any(
+        pdbx_file.block["atom_site"]["label_alt_id"].mask.array
+        == pdbx.MaskValue.PRESENT
+    ):
+        # There is some inconsistency in how foldseek and Biotite handle altloc IDs
+        # -> skip these cases for the sake of simplicity
+        pytest.skip("Structure contains altlocs")
+    atoms = pdbx.get_structure(pdbx_file, model=1)
+    atoms = atoms[struc.filter_amino_acids(atoms)]
+    if len(atoms) == 0:
+        pytest.skip("Structure contains no peptide chains")
+    test_3di, chain_starts = strucalph.to_3di(atoms)
+
+    ref_3di = [
+        _get_ref_3di_sequence(path.stem, chain_id)
+        for chain_id in atoms.chain_id[chain_starts]
+    ]
+
+    for test, ref, chain_id in zip(test_3di, ref_3di, atoms.chain_id[chain_starts]):
+        assert str(test) == str(ref), f"3Di sequence of chain {chain_id} does not match"
+
+
+def test_missing_residues():
+    """
+    Like, `test_to_protein_blocks()`, but in some residues backbone atoms are missing.
+    Expect that these and adjacent residues get the unknown symbol 'Z' in the
+    PB sequence.
+    """
+    PDB_ID = "1aki"
+    N_DELETIONS = 5
+    MAX_MISMATCH_PERCENTAGE = 0.1
+    UKNOWN_SYMBOL = strucalph.I3DSequence.unknown_symbol
+
+    pdbx_file = pdbx.BinaryCIFFile.read(Path(data_dir("structure")) / f"{PDB_ID}.bcif")
+    atoms = pdbx.get_structure(pdbx_file, model=1)
+    atoms = atoms[struc.filter_amino_acids(atoms)]
+
+    # Randomly delete some backbone atoms
+    rng = np.random.default_rng(1)
+    del_backbone_residue_ids = rng.choice(
+        np.unique(atoms.res_id), N_DELETIONS, replace=False
+    )
+    atoms = atoms[
+        ~np.isin(atoms.res_id, del_backbone_residue_ids)
+        | ~np.isin(atoms.atom_name, ("N", "CA", "CB", "C"))
+    ]
+    test_sequences, _ = strucalph.to_3di(atoms)
+
+    # Apply the same deletions to the reference sequence
+    ref_sequence = _get_ref_3di_sequence(PDB_ID, atoms.chain_id[0])
+    for res_id in del_backbone_residue_ids:
+        seq_index = res_id - atoms.res_id[0]
+        # Convert the PDB symbol for residue and adjacent ones to 'Z'
+        start_index = max(0, seq_index - 1)
+        end_index = min(len(ref_sequence), seq_index + 1)
+        ref_sequence[start_index : end_index + 1] = UKNOWN_SYMBOL
+
+    assert len(test_sequences) == 1
+    # 3Di sequences are quite complex, i.e. removing backbone atoms at some position
+    # might alter the symbols in remote positions
+    # -> Allow for mismatches
+    n_mismatches = np.count_nonzero(test_sequences[0].code != ref_sequence.code)
+    assert n_mismatches / len(ref_sequence) <= MAX_MISMATCH_PERCENTAGE
diff --git a/tests/structure/test_pb.py b/tests/structure/test_pb.py
new file mode 100644
index 000000000..5ecb57348
--- /dev/null
+++ b/tests/structure/test_pb.py
@@ -0,0 +1,76 @@
+from pathlib import Path
+import numpy as np
+import pytest
+import biotite.sequence.io.fasta as fasta
+import biotite.structure as struc
+import biotite.structure.alphabet as strucalph
+import biotite.structure.io.pdbx as pdbx
+from tests.util import data_dir
+
+
+@pytest.fixture
+def reference_sequence():
+    """
+    Get the reference Protein Blocks sequence for the alphabet example structure.
+    """
+    _, seq_string = next(
+        fasta.FastaFile.read_iter(Path(data_dir("structure")) / "alphabet" / "pb.fasta")
+    )
+    return strucalph.ProteinBlocksSequence(seq_string)
+
+
+@pytest.fixture
+def reference_chain():
+    pdbx_file = pdbx.BinaryCIFFile.read(
+        Path(data_dir("structure")) / "alphabet" / "1ay7.bcif"
+    )
+    atoms = pdbx.get_structure(pdbx_file, model=1)
+    atoms = atoms[struc.filter_amino_acids(atoms)]
+    chain = atoms[atoms.chain_id == "B"]
+    return chain
+
+
+def test_to_protein_blocks(reference_chain, reference_sequence):
+    """
+    Test the structure conversion to protein blocks based on a reference example from
+    the PBexplore documentation
+    (https://pbxplore.readthedocs.io/en/latest/intro_PB.html).
+    """
+    test_pb_sequences, _ = strucalph.to_protein_blocks(reference_chain)
+
+    assert len(test_pb_sequences) == 1
+    assert str(test_pb_sequences[0]) == str(reference_sequence)
+
+
+def test_missing_residues(reference_chain, reference_sequence):
+    """
+    Like, `test_to_protein_blocks()`, but in some residues backbone atoms are missing.
+    Expect that these and adjacent residues get the unknown symbol 'Z' in the
+    PB sequence.
+    """
+    N_DELETIONS = 5
+    # The 'Z' symbol
+    UKNOWN_SYMBOL = strucalph.ProteinBlocksSequence.unknown_symbol
+
+    # Randomly delete some backbone atoms
+    rng = np.random.default_rng(1)
+    del_backbone_residue_ids = rng.choice(
+        np.unique(reference_chain.res_id), N_DELETIONS, replace=False
+    )
+    reference_chain = reference_chain[
+        ~np.isin(reference_chain.res_id, del_backbone_residue_ids)
+        | ~np.isin(reference_chain.atom_name, ("N", "CA", "C"))
+    ]
+
+    # Apply the same deletions to the reference sequence
+    for res_id in del_backbone_residue_ids:
+        seq_index = res_id - reference_chain.res_id[0]
+        # Convert the PB symbol for residue and adjacent ones to 'Z'
+        start_index = max(0, seq_index - 2)
+        end_index = min(len(reference_sequence), seq_index + 2)
+        reference_sequence[start_index : end_index + 1] = UKNOWN_SYMBOL
+
+    test_pb_sequences, _ = strucalph.to_protein_blocks(reference_chain)
+
+    assert len(test_pb_sequences) == 1
+    assert str(test_pb_sequences[0]) == str(reference_sequence)
diff --git a/tests/test_doctest.py b/tests/test_doctest.py
index 8293210b6..98875124e 100644
--- a/tests/test_doctest.py
+++ b/tests/test_doctest.py
@@ -68,6 +68,7 @@
         "biotite.structure.io.mol", ["biotite.structure", "biotite.structure.info"]
     ),
     pytest.param("biotite.structure.info", ["biotite.structure"]),
+    pytest.param("biotite.structure.alphabet", ["biotite.structure"]),
     pytest.param(
         "biotite.database.entrez",
         [],
diff --git a/tests/test_repr.py b/tests/test_repr.py
index f8bf319c4..7097ba072 100644
--- a/tests/test_repr.py
+++ b/tests/test_repr.py
@@ -20,6 +20,7 @@
 )
 from biotite.sequence.align import Alignment, SubstitutionMatrix
 from biotite.structure import Atom
+from biotite.structure.alphabet import I3DSequence
 
 __author__ = "Maximilian Greil"
 
@@ -32,6 +33,7 @@
         ProteinSequence("BIQTITE"),
         Alphabet(["X", "Y", "Z"]),
         GeneralSequence(Alphabet(["X", 42, False]), ["X", 42, "X"]),
+        I3DSequence("ACDE"),
         LetterAlphabet(["X", "Y", "Z"]),
         Location(98, 178),
         Feature("CDS", [Location(98, 178)], qual={"gene": "test1"}),