"""Base Config object for use with Protein Graph Construction."""
# Graphein
# Author: Arian Jamasb <arian@jamasb.io>
# License: MIT
# Project Website: https://github.com/a-r-j/graphein
# Code Repository: https://github.com/a-r-j/graphein
from __future__ import annotations
from functools import partial
from pathlib import Path
from typing import Any, Callable, List, Optional, Union
from deepdiff import DeepDiff
from pydantic import BaseModel
from typing_extensions import Literal
from graphein.protein.edges.distance import add_peptide_bonds
from graphein.protein.features.nodes.amino_acid import meiler_embedding
from graphein.utils.config import PartialMatchOperator, PathMatchOperator
class DSSPConfig(BaseModel):
executable: str = "mkdssp"
GraphAtoms = Literal[
"N",
"CA",
"C",
"O",
"CB",
"OG",
"CG",
"CD1",
"CD2",
"CE1",
"CE2",
"CZ",
"OD1",
"ND2",
"CG1",
"CG2",
"CD",
"CE",
"NZ",
"OD2",
"OE1",
"NE2",
"OE2",
"OH",
"NE",
"NH1",
"NH2",
"OG1",
"SD",
"ND1",
"SG",
"NE1",
"CE3",
"CZ2",
"CZ3",
"CH2",
"OXT",
]
"""Allowable atom types for nodes in the graph."""
GranularityOpts = Literal["atom", "centroids"]
"""Allowable granularity options for nodes in the graph."""
[docs]class ProteinGraphConfig(BaseModel):
"""
Config Object for Protein Structure Graph Construction.
If you encounter a problematic structure, perusing https://www.umass.edu/microbio/chime/pe_beta/pe/protexpl/badpdbs.htm may provide some additional insight.
PDBs are notoriously troublesome and this is an excellent overview.
:param granularity: Controls the granularity of the graph construction. ``"atom"`` builds an atomic-scale graph where
nodes are constituent atoms. Residue-level graphs can be build by specifying which constituent atom should
represent node positions (see :const:`~graphein.protein.config.GraphAtoms`). Additionally, ``"centroids"`` can be specified to
compute the centre of gravity for a given atom (Specified in :const:`~graphein.protein.config.GranularityOpts`).
Defaults to ``"CA"`` (alpha-Carbon).
:type granularity: str (Union[graphein.protein.config.GraphAtoms, graphein.protein.config.GranularityOpts])
:param keep_hets: Controls whether or not heteroatoms are removed from the PDB file. These are typically modified
residues, bound ligands, crystallographic adjuvants, ions or water molecules.
For more information, see: https://proteopedia.org/wiki/index.php/Hetero_atoms
:type keep_hets: bool
:param insertions: Controls whether or not insertions are allowed.
:type insertions: bool
:param pdb_dir: Specifies path to download protein structures into.
:type pdb_dir: pathlib.Path
:param verbose: Specifies verbosity of graph creation process.
:type verbose: bool
:param exclude_waters: Specifies whether or not water molecules are excluded from the structure
:type excluded_waters: bool
:param deprotonate: Specifies whether or not to remove ``H`` atoms from the graph.
:type deprotonate: bool
:param protein_df_processing_functions: List of functions that take a ``pd.DataFrame`` and return a ``pd.DataFrame``.
This allows users to define their own series of processing functions for the protein structure DataFrame and
override the default sequencing of processing steps provided by Graphein. We refer users to our low-level API
tutorial for more details.
:type protein_df_processing_functions: Optional[List[Callable]]
:param edge_construction_functions: List of functions that take an ``nx.Graph`` and return an ``nx.Graph`` with desired
edges added. Prepared edge constructions can be found in :ref:`graphein.protein.edges`
:type edge_construction_functions: List[Callable]
:param node_metadata_functions: List of functions that take an ``nx.Graph``
:type node_metadata_functions: List[Callable], optional
:param edge_metadata_functions: List of functions that take an
:type edge_metadata_functions: List[Callable], optional
:param graph_metadata_functions: List of functions that take an ``nx.Graph`` and return an ``nx.Graph`` with added
graph-level features and metadata.
:type graph_metadata_functions: List[Callable], optional
:param get_contacts_config: Config object containing parameters for running ``GetContacts`` for computing intramolecular
contact-based edges. Defaults to None.
:type get_contacts_config: GetContactsConfig, optional
:param dssp_config: Config Object containing reference to ``DSSP`` executable. Defaults to None.
**NB** DSSP must be installed. See installation instructions: https://graphein.ai/getting_started/installation.html#optional-dependencies
:type dssp_config: DSSPConfig, optional
"""
granularity: Union[GraphAtoms, GranularityOpts] = "CA"
keep_hets: bool = False
insertions: bool = False
pdb_dir: Path = Path(
"../examples/pdbs/"
) # Also suggest to avoid hard-coding paths if possible!
verbose: bool = False
exclude_waters: bool = True
deprotonate: bool = False
# Graph construction functions
protein_df_processing_functions: Optional[List[Callable]] = None
edge_construction_functions: List[Union[Callable, str]] = [
add_peptide_bonds
]
node_metadata_functions: Optional[List[Union[Callable, str]]] = [
meiler_embedding
]
edge_metadata_functions: Optional[List[Union[Callable, str]]] = None
graph_metadata_functions: Optional[List[Callable]] = None
# External Dependency configs
get_contacts_config: Optional[GetContactsConfig] = None
dssp_config: Optional[DSSPConfig] = None
def __eq__(self, other: Any) -> bool:
"""Overwrites the BaseModel __eq__ function in order to check more specific cases (like partial functions)."""
if isinstance(other, ProteinGraphConfig):
return (
DeepDiff(
self,
other,
custom_operators=[
PartialMatchOperator(types=[partial]),
PathMatchOperator(types=[Path]),
],
)
== {}
)
return self.dict() == other
[docs]class ProteinMeshConfig(BaseModel):
"""
Config object for parameters relating to Protein Mesh construction with ``PyMol``
**NB** PyMol must be installed. See: https://graphein.ai/getting_started/installation.html#optional-dependencies
:param pymol_command_line_options: List of CLI args for running PyMol.
See: https://pymolwiki.org/index.php/Command_Line_Options. Defaults to ``"-cKq"`` ()
:type pymol_command_line_options: str, optional
:param pymol_commands: List of Commands passed to PyMol in surface construction.
:type pymol_commands: List[str], optional
"""
pymol_command_line_options: Optional[str] = "-cKq"
pymol_commands: Optional[List[str]] = ["show surface"]