##############################################################################
# PyLipID: A python module for analysing protein-lipid interactions
#
# Author: Wanling Song
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
##############################################################################
"""This module contains functions for calculating interaction residence time and koff.
"""
import warnings
import numpy as np
from scipy.optimize import curve_fit
from ..plot import plot_koff
__all__ = ["cal_koff", "cal_survival_func", "calculate_koff_wrapper"]
[docs]def cal_koff(durations, t_total, timestep, nbootstrap=10, initial_guess=[1., 1., 1., 1.], cap=True):
r"""Calculate residence time and koff.
This function calculates the normalized survival time correlation function of the given list of durations, and fits t
he survival function to a bi-exponential curve [1]_.
The survival time correlation function σ(t) is calculated as follow
.. math::
\sigma(t) = \frac{1}{N_{j}} \frac{1}{T-t} \sum_{j=1}^{N_{j}} \sum_{v=0}^{T-t}\tilde{n}_{j}(v, v+t)
where T is the length of the simulation trajectory, :math:`N_{j}` is the total number of lipid contacts and
:math:`\sum_{v=0}^{T-t} \tilde{n}_{j}(v, v+t)` is a binary function that takes the value 1 if the contact of
lipid j lasts from time ν to time v+t and 0 otherwise. The values of :math:`\sigma(t)` are calculated for every
value of t from 0 to T ns, for each time step of the trajectories, and normalized by dividing by :math:`\sigma(t)`,
so that the survival time-correlation function has value 1 at t = 0.
The normalized survival function is then fitted to a biexponential to model the long and short decays of
lipid relaxation:
.. math::
\sigma(t) \sim A e^{-k_{1} t}+B e^{-k_{2} t}\left(k_{1} \leq k_{2}\right)
This function then takes :math:`k_{1}` as the the dissociation :math:`k_{off}`, and calculates the residence time as
:math:`\tau=1 / k_{o f f}`.
This function also measures the :math:`r^{2}` of the biexponential fitting to the survival function to show the
quality of the :math:`k_{off}` estimation. In addition, it bootstraps the contact durations and measures the
:math:`k_{off}` of the bootstrapped data, to report how well lipid contacts are sampled from simulations. The
lipid contact sampling, the curve-fitting and the bootstrap results can be conveniently checked via the
:math:`k_{off}` plot. The :math:`r^{2}`, :math:`\sigma(t)` and bootstrapped data are stored in the returned data
``properties``.
Parameters
----------
durations : array_like
A list of interaction durations
t_total : scalar
The duration, or the longest if using multiple simulations of different durations, of the
simulation trajectories. Should be in the same time unit as durations.
timestep : scalar
:math:`\Delta t` of the survival function :math:`\sigma`. Often takes the time step of the simulation
trajectories or multiples of the trajectory time step. Should be in the same time unit as durations.
nbootstrap : int, optional, default=10
Number of bootstrapping. The default is 10.
initial_quess : list, optional, default=(1., 1., 1., 1.)
The initial guess for fitting of a bi-exponential curve to the survival function. Used by
`scipy.optimize.curve_fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html>`_.
cap : bool, optional, default=True
Cap the returned residence time to ``t_total``. This is useful for cases of poor samplings where the curve fitting
may be bad and the calculated residence times may be unrealistically large.
Returns
----------
koff : scalar
The calculated koff. In the unit of :math:`{timeunit^{-1}}` in which :math:`{timeunit}` is the same as
what is used in ``durations``.
res_time : scalar
The calculated residence time. In the same time unit as used by ``durations``.
properties : dict
A dictionary of all the computed values, including the original and bootstrapped koffs, residence times, ks
of the bi-expo curve :math:`y=A*e^{(-k_1*x)}+B*e^{(-k_2*x)}` and :math:`R^2`.
See also
-----------
pylipid.plot.plot_koff
Plotting function for interaction durations and the calculated survival function.
pylipid.api.LipidInteraction.compute_residue_koff
Calculate interaction koff and residence time for residues.
pylipid.api.LipidInteraction.compute_site_koff
Calculate interaction koff and residence time for binding sites.
References
-----------
.. [1] García, Angel E.Stiller, Lewis. Computation of the mean residence time of water in the hydration shells
of biomolecules. 1993. Journal of Computational Chemistry.
"""
# calculate original residence time
delta_t_list = np.arange(0, t_total, timestep)
survival_func = cal_survival_func(durations, np.max(t_total), delta_t_list)
survival_rates = np.array([survival_func[delta_t] for delta_t in delta_t_list])
res_time, _, r_squared, params = _curve_fitting(survival_func, delta_t_list, initial_guess)
if cap and res_time > t_total:
res_time = t_total
n_fitted = _bi_expo(np.array(delta_t_list), *params)
r_squared = 1 - np.sum((np.nan_to_num(n_fitted) - np.nan_to_num(survival_rates)) ** 2) / np.sum(
(survival_rates - np.mean(survival_rates)) ** 2)
ks = [abs(k) for k in params[:2]]
ks.sort() # the smaller k is considered as koff
# calculate bootstrapped residence time
if nbootstrap > 0:
duration_boot_set = [np.random.choice(durations, size=len(durations)) for dummy in range(nbootstrap)]
ks_boot_set = []
r_squared_boot_set = []
survival_rates_boot_set = []
n_fitted_boot_set = []
for duration_boot in duration_boot_set:
survival_func_boot = cal_survival_func(duration_boot, np.max(t_total), delta_t_list)
survival_rates_boot = np.array([survival_func_boot[delta_t] for delta_t in delta_t_list])
_, _, r_squared_boot, params_boot = _curve_fitting(survival_func_boot, delta_t_list,
initial_guess)
n_fitted_boot = _bi_expo(np.array(delta_t_list), *params_boot)
r_squared_boot = 1 - np.sum((np.nan_to_num(n_fitted_boot) - np.nan_to_num(survival_rates_boot)) ** 2) / np.sum(
(survival_rates_boot - np.mean(survival_rates_boot)) ** 2)
ks_boot = [abs(k) for k in params_boot[:2]]
ks_boot.sort()
ks_boot_set.append(ks_boot)
r_squared_boot_set.append(r_squared_boot)
survival_rates_boot_set.append(survival_rates_boot)
n_fitted_boot_set.append(n_fitted_boot)
else:
ks_boot_set = [0]
r_squared_boot_set = [0]
survival_rates_boot_set = [0]
n_fitted_boot_set = [0]
properties = {"ks": ks, "res_time": res_time, "delta_t_list": delta_t_list,
"survival_rates": survival_rates, "survival_rates_boot_set": survival_rates_boot_set,
"n_fitted": n_fitted, "n_fitted_boot_set": n_fitted_boot_set,
"ks_boot_set": ks_boot_set,
"r_squared": r_squared, "r_squared_boot_set": r_squared_boot_set}
return ks[0], res_time, properties
[docs]def cal_survival_func(durations, t_total, delta_t_list):
r"""Compute the normalised survival function.
Calculate the normalized survival time correlation function of the given list of durations.
The survival time correlation function σ(t) is calculated as follow
.. math::
\sigma(t) = \frac{1}{N_{j}} \frac{1}{T-t} \sum_{j=1}^{N_{j}} \sum_{v=0}^{T-t}\tilde{n}_{j}(v, v+t)
where T is the length of the simulation trajectory, :math:`N_{j}` is the total number of lipid contacts and
:math:`\sum_{v=0}^{T-t} \tilde{n}_{j}(v, v+t)` is a binary function that takes the value 1 if the contact of
lipid j lasts from time ν to time v+t and 0 otherwise. The values of :math:`\sigma(t)` are calculated for every
value of t from 0 to T ns, for each time step of the trajectories, and normalized by dividing by :math:`\sigma(t)`,
so that the survival time-correlation function has value 1 at t = 0.
Parameters
-----------
durations : array_like
A list of contact durations.
t_total : scalar
The duration or length, or the longest if using multiple simulations of different durations/lengths, of the
simulation trajectories. Should be in the same time unit as durations.
delta_t_list : array_like
The list of :math:`\Delta t` for the survival function :math:`\sigma` to check the interaction survival rate.
Returns
-----------
survival_func : dict
The survival function :math:`\sigma` stored in a dictionary {delta_t: survival rate}.
See also
-----------
pylipid.func.cal_koff
Calculate residence time and koff.
"""
num_of_contacts = len(durations)
survival_func = {}
for delta_t in delta_t_list:
if delta_t == 0:
survival_func[delta_t] = 1
survival_func0 = float(sum([res_time - delta_t for res_time in durations if res_time >= delta_t])) / \
((t_total - delta_t) * num_of_contacts)
else:
try:
survival_func[delta_t] = float(sum([res_time - delta_t for res_time in durations if res_time >= delta_t])) / \
((t_total - delta_t) * num_of_contacts * survival_func0)
except ZeroDivisionError:
survival_func[delta_t] = 0
return survival_func
def _curve_fitting(survival_func, delta_t_list, initial_guess):
"""Fit the exponential curve :math:`y=Ae^{-k_1\Delta t}+Be^{-k_2\Delta t}`"""
survival_rates = np.nan_to_num([survival_func[delta_t] for delta_t in delta_t_list]) # y
try:
popt, pcov = curve_fit(_bi_expo, np.array(delta_t_list), np.array(survival_rates), p0=initial_guess, maxfev=100000)
n_fitted = _bi_expo(np.array(delta_t_list, dtype=np.float128), *popt)
r_squared = 1 - np.sum((np.nan_to_num(n_fitted) -
np.nan_to_num(survival_rates))**2)/np.sum((survival_rates - np.mean(survival_rates))**2)
ks = [abs(k) for k in popt[:2]]
koff = np.min(ks)
res_time = 1/koff
except RuntimeError:
koff = 0
res_time = 0
r_squared = 0
popt = [0, 0, 0, 0]
return res_time, koff, r_squared, popt
def _bi_expo(x, k1, k2, A, B):
"""The exponential curve :math:`y=Ae^{-k_1\Delta t}+Be^{-k_2\Delta t}`"""
return A*np.exp(-k1*x) + B*np.exp(-k2*x)
[docs]def calculate_koff_wrapper(durations, title, fn, t_total=None, timestep=1, nbootstrap=10,
initial_guess=[1., 1., 1., 1.], plot_data=True, timeunit="us", fig_close=True):
"""Wrapper function that calculates koff and plot koff. """
if np.sum(durations) == 0:
koff = 0
res_time = 0
r_squared = 0
koff_boot = 0
r_squared_boot = 0
else:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
koff, res_time, properties = cal_koff(durations, t_total, timestep, nbootstrap, initial_guess)
r_squared = properties["r_squared"]
koff_boot = np.mean(properties["ks_boot_set"], axis=0)[0]
r_squared_boot = np.mean(properties["r_squared_boot_set"])
if plot_data:
text = _format_koff_text(properties, timeunit)
plot_koff(durations, properties["delta_t_list"], properties["survival_rates"],
properties["n_fitted"], survival_rates_bootstraps=properties["survival_rates_boot_set"],
fig_fn=fn, title=title, timeunit=timeunit, t_total=t_total, text=text, fig_close=fig_close)
return koff, res_time, r_squared, koff_boot, r_squared_boot
def _format_koff_text(properties, timeunit):
"""Format text for koff plot. """
tu = "ns" if timeunit == "ns" else r"$\mu$s"
text = "{:18s} = {:.3f} {:2s}$^{{-1}} $\n".format("$k_{{off1}}$", properties["ks"][0], tu)
text += "{:18s} = {:.3f} {:2s}$^{{-1}} $\n".format("$k_{{off2}}$", properties["ks"][1], tu)
text += "{:14s} = {:.4f}\n".format("$R^2$", properties["r_squared"])
ks_boot_avg = np.mean(properties["ks_boot_set"], axis=0)
cv_avg = 100 * np.std(properties["ks_boot_set"], axis=0) / np.mean(properties["ks_boot_set"], axis=0)
text += "{:18s} = {:.3f} {:2s}$^{{-1}}$ ({:3.1f}%)\n".format("$k_{{off1, boot}}$", ks_boot_avg[0],
tu, cv_avg[0])
text += "{:18s} = {:.3f} {:2s}$^{{-1}}$ ({:3.1f}%)\n".format("$k_{{off2, boot}}$", ks_boot_avg[1],
tu, cv_avg[1])
text += "{:14s} = {:.4f}\n".format("$R^2$$_{{boot}}$", np.mean(properties["r_squared_boot_set"]))
text += "{:18s} = {:.3f} {:2s}".format("$Res. Time$", properties["res_time"], tu)
return text