Demo¶

Here we provide a no-brainer demo script for lipid interaction analysis using PyLipID. This script works for versions later than 1.4. Please update PyLipID to the latest version
import numpy as np
import matplotlib.pyplot as plt
from pylipid.api import LipidInteraction
from pylipid.util import check_dir

##################################################################
##### This part needs changes according to your setting ##########
##################################################################
trajfile_list = ["run1/md.xtc", "run2/md.xtc"]
topfile_list = ["run1/md.gro", "run2/md.gro"]  # topology file is needed when trajectory format does not
                                               # provide topology information. See mdtraj.load() for more
                                               # information.
dt_traj = None  # the timestep of trajectories. Need to use this param when trajectories are in a format
                # with no timestep information. Not necessary for trajectory formats of e.g. xtc, trr.
stride = 1   # tell pylipid to analyze every stride-th frame. Can be used to save computation memory
             # and speed up the calculation.

lipid = "CHOL"   # residue name in the topology.
lipid_atoms = None  # all lipid atoms will be considered for interaction calculation.
cutoffs = [0.5, 0.8]  # dual-cutoff scheme for coarse-grained simulations. Single-cutoff scheme can be
                      # achieved by using the same value for two cutoffs.

nprot = 1   # if the simulation system has N copies of receptors, "nprot=N" will report interactions
            # averaged from the N copies, but "nprot=1" will ask PyLipID to report interaction for
            # each copy.

binding_site_size = 4  # binding site should contain at least four residues.

n_top_poses = 3     # write out num. of representative bound poses for each binding site.
n_clusters = "auto"  # cluster the bound poses for a binding site into num. of clusters. PyLipID
                     # will write out a pose conformation for each of the cluster. By default, i.e.
                     # "auto", PyLipID will use a density based clusterer to find possible clusters.

save_dir = None  # save at current working directory if it is None.
save_pose_format = "gro"  # format that poses are written in
save_pose_traj = True  # save all the bound poses in a trajectory for each binding site. The generated
                       # trajectories can take some disk space (up to a couple GB depending on your system).
save_pose_traj_format = "xtc"  # The format for the saved pose trajectories. Can take any format that is supported
                               # by mdtraj.

timeunit = "us"  # micro-sec. "ns" is nanosecond. Time unit used for reporting the results.
resi_offset = 0  # shift the residue index, useful for MARTINI models.

radii = None  # Radii of protein atoms/beads. In the format of python dictionary {atom_name: radius}
              # Used for calculation of binding site surface area. The van der waals radii of common atoms were
              # defined by mdtraj (https://github.com/mdtraj/mdtraj/blob/master/mdtraj/geometry/sasa.py#L56).
              # The radii of MARTINI 2.2 beads were included in PyLipID.

pdb_file_to_map = None   # if a pdb coordinate of the receptor is provided, a python script
                         # "show_binding_site_info.py" will be generated which maps the binding
                         # site information to the structure in PyMol. As PyMol cannot recognize
                         # coarse-grained structures, an atomistic structure of the receptor is needed.

fig_format = "pdf"  # format for all pylipid produced figures. Allow for formats that are supported by
                    # matplotlib.pyplot.savefig().

num_cpus = None  # the number of cpu to use when functions are using multiprocessing. By default,
                 # i.e. None, the functions will use up all the cpus available. This can use up all the memory in
                 # some cases.

#####################################
###### no changes needed below ######
#####################################

#### calculate lipid interactions
li = LipidInteraction(trajfile_list, topfile_list=topfile_list, cutoffs=cutoffs, lipid=lipid,
                      lipid_atoms=lipid_atoms, nprot=1, resi_offset=resi_offset,
                      timeunit=timeunit, save_dir=save_dir, stride=stride, dt_traj=dt_traj)
li.collect_residue_contacts()
li.compute_residue_duration(residue_id=None)
li.compute_residue_occupancy(residue_id=None)
li.compute_residue_lipidcount(residue_id=None)
li.show_stats_per_traj(write_log=True, print_log=True)
li.compute_residue_koff(residue_id=None, plot_data=True, fig_close=True,
                        fig_format=fig_format, num_cpus=num_cpus)
li.compute_binding_nodes(threshold=binding_site_size, print_data=False)
if len(li.node_list) == 0:
    print("*"*50)
    print("No binding site detected! Skip analysis for binding sites.")
    print("*"*50)
else:
    li.compute_site_duration(binding_site_id=None)
    li.compute_site_occupancy(binding_site_id=None)
    li.compute_site_lipidcount(binding_site_id=None)
    li.compute_site_koff(binding_site_id=None, plot_data=True, fig_close=True,
                         fig_format=fig_format, num_cpus=num_cpus)
    pose_traj, pose_rmsd_data = li.analyze_bound_poses(binding_site_id=None, pose_format=save_pose_format,
                                                       n_top_poses=n_top_poses, n_clusters=n_clusters,
                                                       fig_format=fig_format, num_cpus=num_cpus)
    # save pose trajectories
    if save_pose_traj:
        for bs_id in pose_traj.keys():
            pose_traj[bs_id].save("{}/Bound_Poses_{}/Pose_traj_BSid{}.{}".format(li.save_dir, li.lipid, bs_id,
                                                                          save_pose_traj_format))
    del pose_traj  # save memory space
    surface_area_data = li.compute_surface_area(binding_site_id=None, radii=radii, fig_format=fig_format)
    data_dir = check_dir(li.save_dir, "Dataset_{}".format(li.lipid))
    pose_rmsd_data.to_csv("{}/Pose_RMSD_data.csv".format(data_dir), index=False, header=True)
    surface_area_data.to_csv("{}/Surface_Area_data.csv".format(data_dir), index=True, header=True)
    li.write_site_info(sort_residue="Residence Time")

if pdb_file_to_map is not None:
    li.save_pymol_script(pdb_file_to_map)

#### write and save data
for item in ["Dataset", "Duration", "Occupancy", "Lipid Count", "CorrCoef"]:
    li.save_data(item=item)
for item in ["Residence Time", "Duration", "Occupancy", "Lipid Count"]:
    li.save_coordinate(item=item)
for item in ["Residence Time", "Duration", "Occupancy", "Lipid Count"]:
    li.plot(item=item, fig_close=True, fig_format=fig_format)
    li.plot_logo(item=item, fig_close=True, fig_format=fig_format)

#### plot binding site comparison.
if len(li.node_list) > 0:
    for item in ["Duration BS", "Occupancy BS"]:
        li.save_data(item=item)

        ylabel_timeunit = 'ns' if li.timeunit == "ns" else r"$\mu$s"
        ylabel_dict = {"Residence Time": "Residence Time ({})".format(ylabel_timeunit),
                       "Duration": "Duration ({})".format(ylabel_timeunit),
                       "Occupancy": "Occuoancy (100%)",
                       "Lipid Count": "Lipid Count (num.)"}

        # plot No. 1
        binding_site_IDs = np.sort(
                 [int(bs_id) for bs_id in li.dataset["Binding Site ID"].unique() if bs_id != -1])
        for item in ["Residence Time", "Duration", "Occupancy", "Lipid Count"]:
            item_values = np.array(
                      [li.dataset[li.dataset["Binding Site ID"]==bs_id]["Binding Site {}".format(item)].unique()[0]
                       for bs_id in binding_site_IDs])
            fig, ax = plt.subplots(1, 1, figsize=(len(li.node_list)*0.5, 2.6))
            ax.scatter(np.arange(len(item_values)), np.sort(item_values)[::-1], s=50, color="red")
            ax.set_xticks(np.arange(len(item_values)))
            sorted_index = np.argsort(item_values)[::-1]
            ax.set_xticklabels(binding_site_IDs[sorted_index])
            ax.set_xlabel("Binding Site ID", fontsize=12)
            ax.set_ylabel(ylabel_dict[item], fontsize=12)
            for label in ax.xaxis.get_ticklabels()+ax.yaxis.get_ticklabels():
                plt.setp(label, fontsize=12, weight="normal")
            plt.tight_layout()
            plt.savefig("{}/{}_{}_v_binding_site.{}".format(li.save_dir, li.lipid, "_".join(item.split()), fig_format),
                        dpi=200)
            plt.close()

        # plot No. 2
        binding_site_IDs_RMSD = np.sort([int(bs_id) for bs_id in binding_site_IDs
                                        if f"Binding Site {bs_id}" in pose_rmsd_data.columns])
        RMSD_averages = np.array(
                     [pose_rmsd_data[f"Binding Site {bs_id}"].dropna(inplace=False).mean()
                      for bs_id in binding_site_IDs_RMSD])
        fig, ax = plt.subplots(1, 1, figsize=(len(li.node_list)*0.5, 2.6))
        ax.scatter(np.arange(len(RMSD_averages)), np.sort(RMSD_averages)[::-1], s=50, color="red")
        ax.set_xticks(np.arange(len(RMSD_averages)))
        sorted_index = np.argsort(RMSD_averages)[::-1]
        ax.set_xticklabels(binding_site_IDs_RMSD[sorted_index])
        ax.set_xlabel("Binding Site ID", fontsize=12)
        ax.set_ylabel("RMSD (nm)", fontsize=12)
        for label in ax.xaxis.get_ticklabels()+ax.yaxis.get_ticklabels():
            plt.setp(label, fontsize=12, weight="normal")
        plt.tight_layout()
        plt.savefig("{}/{}_RMSD_v_binding_site.{}".format(li.save_dir, li.lipid, fig_format), dpi=200)
        plt.close()

        # plot No. 3
        surface_area_averages = np.array(
                       [surface_area_data["Binding Site {}".format(bs_id)].dropna(inplace=False).mean()
                        for bs_id in binding_site_IDs])
        fig, ax = plt.subplots(1, 1, figsize=(len(li.node_list)*0.5, 2.6))
        ax.scatter(np.arange(len(surface_area_averages)), np.sort(surface_area_averages)[::-1], s=50, color="red")
        ax.set_xticks(np.arange(len(surface_area_averages)))
        sorted_index = np.argsort(surface_area_averages)[::-1]
        ax.set_xticklabels(binding_site_IDs[sorted_index])
        ax.set_xlabel("Binding Site ID", fontsize=12)
        ax.set_ylabel(r"Surface Area (nm$^2$)", fontsize=12)
        for label in ax.xaxis.get_ticklabels()+ax.yaxis.get_ticklabels():
            plt.setp(label, fontsize=12, weight="normal")
        plt.tight_layout()
        plt.savefig("{}/{}_surface_area_v_binding_site.{}".format(li.save_dir, li.lipid, fig_format), dpi=200)
        plt.close()

        # plot No. 4
        res_time_BS = np.array(
                  [li.dataset[li.dataset["Binding Site ID"]==bs_id]["Binding Site Residence Time"].unique()[0]
                   for bs_id in binding_site_IDs_RMSD])
        fig, ax = plt.subplots(1, 1, figsize=(len(li.node_list)*0.5, 2.6))
        ax.scatter(res_time_BS, RMSD_averages, s=50, color="red")
        ax.set_xlabel(ylabel_dict["Residence Time"], fontsize=12)
        ax.set_ylabel("RMSD (nm)", fontsize=12)
        for label in ax.xaxis.get_ticklabels()+ax.yaxis.get_ticklabels():
            plt.setp(label, fontsize=12, weight="normal")
        plt.tight_layout()
        plt.savefig("{}/{}_Residence_Time_v_RMSD.{}".format(li.save_dir, li.lipid, fig_format), dpi=200)
        plt.close()

        # plot No. 5
        res_time_BS = np.array(
                  [li.dataset[li.dataset["Binding Site ID"]==bs_id]["Binding Site Residence Time"].unique()[0]
                   for bs_id in binding_site_IDs])
        fig, ax = plt.subplots(1, 1, figsize=(len(li.node_list)*0.5, 2.6))
        ax.scatter(res_time_BS, surface_area_averages, s=50, color="red")
        ax.set_xlabel(ylabel_dict["Residence Time"], fontsize=12)
        ax.set_ylabel(r"Surface Area (nm$^2$)", fontsize=12)
        for label in ax.xaxis.get_ticklabels()+ax.yaxis.get_ticklabels():
            plt.setp(label, fontsize=12, weight="normal")
        plt.tight_layout()
        plt.savefig("{}/{}_Residence_Time_v_surface_area.{}".format(li.save_dir, li.lipid, fig_format), dpi=200)
        plt.close()