Source code for pylipid.func.kinetics

##############################################################################
# PyLipID: A python module for analysing protein-lipid interactions
#
# Author: Wanling Song
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
##############################################################################

"""This module contains functions for calculating interaction residence time and koff.
"""
import warnings
import numpy as np
from scipy.optimize import curve_fit
from ..plot import plot_koff


__all__ = ["cal_koff", "cal_survival_func", "calculate_koff_wrapper"]


[docs]def cal_koff(durations, t_total, timestep, nbootstrap=10, initial_guess=[1., 1., 1., 1.], cap=True):
    r"""Calculate residence time and koff.

    This function calculates the normalized survival time correlation function of the given list of durations, and fits t
    he survival function to a bi-exponential curve [1]_.

    The survival time correlation function σ(t) is calculated as follow

    .. math::

        \sigma(t) = \frac{1}{N_{j}} \frac{1}{T-t} \sum_{j=1}^{N_{j}} \sum_{v=0}^{T-t}\tilde{n}_{j}(v, v+t)

    where T is the length of the simulation trajectory, :math:`N_{j}` is the total number of lipid contacts and
    :math:`\sum_{v=0}^{T-t} \tilde{n}_{j}(v, v+t)` is a binary function that takes the value 1 if the contact of
    lipid j lasts from time ν to time v+t and 0 otherwise. The values of :math:`\sigma(t)` are calculated for every
    value of t from 0 to T ns, for each time step of the trajectories, and normalized by dividing by :math:`\sigma(t)`,
    so that the survival time-correlation function has value 1 at t = 0.

    The normalized survival function is then fitted to a biexponential to model the long and short decays of
    lipid relaxation:

    .. math::
        \sigma(t) \sim A e^{-k_{1} t}+B e^{-k_{2} t}\left(k_{1} \leq k_{2}\right)

    This function then takes :math:`k_{1}` as the the dissociation :math:`k_{off}`, and calculates the residence time as
    :math:`\tau=1 / k_{o f f}`.

    This function also measures the :math:`r^{2}` of the biexponential fitting to the survival function to show the
    quality of the :math:`k_{off}` estimation. In addition, it bootstraps the contact durations and measures the
    :math:`k_{off}` of the bootstrapped data, to report how well lipid contacts are sampled from simulations. The
    lipid contact sampling, the curve-fitting and the bootstrap results can be conveniently checked via the
    :math:`k_{off}` plot. The :math:`r^{2}`, :math:`\sigma(t)` and bootstrapped data are stored in the returned data
    ``properties``.

    Parameters
    ----------
    durations : array_like
            A list of interaction durations

    t_total : scalar
            The duration, or the longest if using multiple simulations of different durations, of the
            simulation trajectories. Should be in the same time unit as durations.

    timestep : scalar
            :math:`\Delta t` of the survival function :math:`\sigma`. Often takes the time step of the simulation
            trajectories or multiples of the trajectory time step. Should be in the same time unit as durations.

    nbootstrap : int, optional, default=10
            Number of bootstrapping. The default is 10.

    initial_quess : list, optional, default=(1., 1., 1., 1.)
            The initial guess for fitting of a bi-exponential curve to the survival function. Used by
            `scipy.optimize.curve_fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html>`_.

    cap : bool, optional, default=True
            Cap the returned residence time to ``t_total``. This is useful for cases of poor samplings where the curve fitting
            may be bad and the calculated residence times may be unrealistically large.

    Returns
    ----------
    koff : scalar
            The calculated koff. In the unit of :math:`{timeunit^{-1}}` in which :math:`{timeunit}` is the same as
            what is used in ``durations``.

    res_time : scalar
            The calculated residence time. In the same time unit as used by ``durations``.

    properties : dict
            A dictionary of all the computed values, including the original and bootstrapped koffs, residence times, ks
            of the bi-expo curve :math:`y=A*e^{(-k_1*x)}+B*e^{(-k_2*x)}` and :math:`R^2`.

    See also
    -----------
    pylipid.plot.plot_koff
            Plotting function for interaction durations and the calculated survival function.
    pylipid.api.LipidInteraction.compute_residue_koff
        Calculate interaction koff and residence time for residues.
    pylipid.api.LipidInteraction.compute_site_koff
        Calculate interaction koff and residence time for binding sites.

    References
    -----------
    .. [1] García, Angel E.Stiller, Lewis. Computation of the mean residence time of water in the hydration shells
           of biomolecules. 1993. Journal of Computational Chemistry.

    """
    # calculate original residence time
    delta_t_list = np.arange(0, t_total, timestep)
    survival_func = cal_survival_func(durations, np.max(t_total), delta_t_list)
    survival_rates = np.array([survival_func[delta_t] for delta_t in delta_t_list])
    res_time, _, r_squared, params = _curve_fitting(survival_func, delta_t_list, initial_guess)
    if cap and res_time > t_total:
        res_time = t_total
    n_fitted = _bi_expo(np.array(delta_t_list), *params)
    r_squared = 1 - np.sum((np.nan_to_num(n_fitted) - np.nan_to_num(survival_rates)) ** 2) / np.sum(
        (survival_rates - np.mean(survival_rates)) ** 2)
    ks = [abs(k) for k in params[:2]]
    ks.sort() # the smaller k is considered as koff

    # calculate bootstrapped residence time
    if nbootstrap > 0:
        duration_boot_set = [np.random.choice(durations, size=len(durations)) for dummy in range(nbootstrap)]
        ks_boot_set = []
        r_squared_boot_set = []
        survival_rates_boot_set = []
        n_fitted_boot_set = []
        for duration_boot in duration_boot_set:
            survival_func_boot = cal_survival_func(duration_boot, np.max(t_total), delta_t_list)
            survival_rates_boot = np.array([survival_func_boot[delta_t] for delta_t in delta_t_list])
            _, _, r_squared_boot, params_boot = _curve_fitting(survival_func_boot, delta_t_list,
                                                                                                initial_guess)
            n_fitted_boot = _bi_expo(np.array(delta_t_list), *params_boot)
            r_squared_boot = 1 - np.sum((np.nan_to_num(n_fitted_boot) - np.nan_to_num(survival_rates_boot)) ** 2) / np.sum(
                (survival_rates_boot - np.mean(survival_rates_boot)) ** 2)
            ks_boot = [abs(k) for k in params_boot[:2]]
            ks_boot.sort()
            ks_boot_set.append(ks_boot)
            r_squared_boot_set.append(r_squared_boot)
            survival_rates_boot_set.append(survival_rates_boot)
            n_fitted_boot_set.append(n_fitted_boot)
    else:
        ks_boot_set = [0]
        r_squared_boot_set = [0]
        survival_rates_boot_set = [0]
        n_fitted_boot_set = [0]

    properties = {"ks": ks, "res_time": res_time, "delta_t_list": delta_t_list,
                  "survival_rates": survival_rates, "survival_rates_boot_set": survival_rates_boot_set,
                  "n_fitted": n_fitted, "n_fitted_boot_set": n_fitted_boot_set,
                  "ks_boot_set": ks_boot_set,
                  "r_squared": r_squared, "r_squared_boot_set": r_squared_boot_set}

    return ks[0], res_time, properties


[docs]def cal_survival_func(durations, t_total, delta_t_list):
    r"""Compute the normalised survival function.

    Calculate the normalized survival time correlation function of the given list of durations.
    The survival time correlation function σ(t) is calculated as follow

    .. math::

        \sigma(t) = \frac{1}{N_{j}} \frac{1}{T-t} \sum_{j=1}^{N_{j}} \sum_{v=0}^{T-t}\tilde{n}_{j}(v, v+t)

    where T is the length of the simulation trajectory, :math:`N_{j}` is the total number of lipid contacts and
    :math:`\sum_{v=0}^{T-t} \tilde{n}_{j}(v, v+t)` is a binary function that takes the value 1 if the contact of
    lipid j lasts from time ν to time v+t and 0 otherwise. The values of :math:`\sigma(t)` are calculated for every
    value of t from 0 to T ns, for each time step of the trajectories, and normalized by dividing by :math:`\sigma(t)`,
    so that the survival time-correlation function has value 1 at t = 0.

    Parameters
    -----------
    durations : array_like
            A list of contact durations.

    t_total : scalar
            The duration or length, or the longest if using multiple simulations of different durations/lengths, of the
            simulation trajectories. Should be in the same time unit as durations.

    delta_t_list : array_like
            The list of :math:`\Delta t` for the survival function :math:`\sigma` to check the interaction survival rate.

    Returns
    -----------
    survival_func : dict
            The survival function :math:`\sigma` stored in a dictionary {delta_t: survival rate}.

    See also
    -----------
    pylipid.func.cal_koff
        Calculate residence time and koff.

    """
    num_of_contacts = len(durations)
    survival_func = {}
    for delta_t in delta_t_list:
        if delta_t == 0:
            survival_func[delta_t] = 1
            survival_func0 = float(sum([res_time - delta_t for res_time in durations if res_time >= delta_t])) / \
                     ((t_total - delta_t) * num_of_contacts)
        else:
            try:
                survival_func[delta_t] = float(sum([res_time - delta_t for res_time in durations if res_time >= delta_t])) / \
                                 ((t_total - delta_t) * num_of_contacts * survival_func0)
            except ZeroDivisionError:
                survival_func[delta_t] = 0
    return survival_func


def _curve_fitting(survival_func, delta_t_list, initial_guess):
    """Fit the exponential curve :math:`y=Ae^{-k_1\Delta t}+Be^{-k_2\Delta t}`"""
    survival_rates = np.nan_to_num([survival_func[delta_t] for delta_t in delta_t_list]) # y
    try:
        popt, pcov = curve_fit(_bi_expo, np.array(delta_t_list), np.array(survival_rates), p0=initial_guess, maxfev=100000)
        n_fitted = _bi_expo(np.array(delta_t_list, dtype=np.float128), *popt)
        r_squared = 1 - np.sum((np.nan_to_num(n_fitted) -
                                np.nan_to_num(survival_rates))**2)/np.sum((survival_rates - np.mean(survival_rates))**2)
        ks = [abs(k) for k in popt[:2]]
        koff = np.min(ks)
        res_time = 1/koff
    except RuntimeError:
        koff = 0
        res_time = 0
        r_squared = 0
        popt = [0, 0, 0, 0]
    return res_time, koff, r_squared, popt


def _bi_expo(x, k1, k2, A, B):
    """The exponential curve :math:`y=Ae^{-k_1\Delta t}+Be^{-k_2\Delta t}`"""
    return A*np.exp(-k1*x) + B*np.exp(-k2*x)


[docs]def calculate_koff_wrapper(durations, title, fn, t_total=None, timestep=1, nbootstrap=10,
                           initial_guess=[1., 1., 1., 1.], plot_data=True, timeunit="us", fig_close=True):
    """Wrapper function that calculates koff and plot koff. """
    if np.sum(durations) == 0:
        koff = 0
        res_time = 0
        r_squared = 0
        koff_boot = 0
        r_squared_boot = 0
    else:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            koff, res_time, properties = cal_koff(durations, t_total, timestep, nbootstrap, initial_guess)
        r_squared = properties["r_squared"]
        koff_boot = np.mean(properties["ks_boot_set"], axis=0)[0]
        r_squared_boot = np.mean(properties["r_squared_boot_set"])
        if plot_data:
            text = _format_koff_text(properties, timeunit)
            plot_koff(durations, properties["delta_t_list"], properties["survival_rates"],
                      properties["n_fitted"], survival_rates_bootstraps=properties["survival_rates_boot_set"],
                      fig_fn=fn, title=title, timeunit=timeunit, t_total=t_total, text=text, fig_close=fig_close)
    return koff, res_time, r_squared, koff_boot, r_squared_boot


def _format_koff_text(properties, timeunit):
    """Format text for koff plot. """
    tu = "ns" if timeunit == "ns" else r"$\mu$s"
    text = "{:18s} = {:.3f} {:2s}$^{{-1}} $\n".format("$k_{{off1}}$", properties["ks"][0], tu)
    text += "{:18s} = {:.3f} {:2s}$^{{-1}} $\n".format("$k_{{off2}}$", properties["ks"][1], tu)
    text += "{:14s} = {:.4f}\n".format("$R^2$", properties["r_squared"])
    ks_boot_avg = np.mean(properties["ks_boot_set"], axis=0)
    cv_avg = 100 * np.std(properties["ks_boot_set"], axis=0) / np.mean(properties["ks_boot_set"], axis=0)
    text += "{:18s} = {:.3f} {:2s}$^{{-1}}$ ({:3.1f}%)\n".format("$k_{{off1, boot}}$", ks_boot_avg[0],
                                                                 tu, cv_avg[0])
    text += "{:18s} = {:.3f} {:2s}$^{{-1}}$ ({:3.1f}%)\n".format("$k_{{off2, boot}}$", ks_boot_avg[1],
                                                                 tu, cv_avg[1])
    text += "{:14s} = {:.4f}\n".format("$R^2$$_{{boot}}$", np.mean(properties["r_squared_boot_set"]))
    text += "{:18s} = {:.3f} {:2s}".format("$Res. Time$", properties["res_time"], tu)
    return text