Source code for graphein.protein.features.utils
"""Utility functions to work with graph-level features."""
# Graphein
# Author: Arian Jamasb <arian@jamasb.io>
# License: MIT
# Project Website: https://github.com/a-r-j/graphein
# Code Repository: https://github.com/a-r-j/graphein
import networkx as nx
import numpy as np
import pandas as pd
[docs]def convert_graph_dict_feat_to_series(
G: nx.Graph, feature_name: str
) -> nx.Graph:
"""
Takes in a graph and a graph-level ``feature_name``. Converts this feature to a ``pd.Series``.
This is useful as some features are output as dictionaries and we wish to standardise this.
:param G: nx.Graph containing ``G.graph[f"{feature_name}"]`` (``Dict[Any, Any]``).
:type G: nx.Graph
:param feature_name: Name of feature to convert to dictionary.
:type feature_name: str
:return: nx.Graph containing ``G.graph[f"{feature_name}"]: pd.Series``.
:rtype: nx.Graph
"""
G.graph[feature_name] = pd.Series(G.graph[feature_name])
return G
[docs]def aggregate_graph_feature_over_chains(
G: nx.Graph, feature_name: str, aggregation_type: str
) -> nx.Graph:
"""
Performs aggregation of a feature over the chains. E.g. sums/averages/min/max molecular weights for each chain.
:param G: nx.Graph of protein containing chain-specific features.
:type G: nx.Graph
:param feature_name: Name of features to aggregate.
:type feature_name: str
:param aggregation_type: Type of aggregation to perform (``"min"`, ``"max"``, ``"sum"``, ``"mean"``).
:type aggregation_type: str
:raises NameError: If ``aggregation_type`` is not one of ``"min"`, ``"max"``, ``"sum"``, ``"mean"``.
:return: nx.Graph of protein with a new aggregated feature ``G.graph[f"{feature_name}_{aggregation_type}"]``.
:rtype: nx.Graph
"""
if aggregation_type == "mean":
func = np.mean
elif aggregation_type == "max":
func = np.max
elif aggregation_type == "sum":
func = np.sum
elif aggregation_type == "min":
func = np.min
else:
raise NameError(
"Unsupported aggregation type. Please use mean, max, sum or min."
)
G.graph[f"{feature_name}_{aggregation_type}"] = func(
[G.graph[f"{feature_name}_{c}"] for c in G.graph["chain_ids"]]
)
return G