Source code for pylipid.util.pymol_script

##############################################################################
# PyLipID: A python module for analysing protein-lipid interactions
#
# Author: Wanling Song
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
##############################################################################

"""This module contains functions that write PyMol scripts."""

import os
from shutil import copyfile

__all__ = ["write_pymol_script"]


[docs]def write_pymol_script(fname, pdb_fname, data_fname, lipid, n_binding_site):
    """Write Python script that opens a PyMol session with binding site information.

    This function will generate a Python script which opens a PyMol session in which the binding site information
    stored in the interaction data file (a csv file provided by ``data_fname``) is mapped onto the receptor structure
    (loaded from a PDB coordinate file provided by ``pdb_fname``). In this PyMol session, residues from the same binding
    site are colored in the same color and shown in spheres with scales corresponding to their residence times.

    The PDB coordinate of the receptor need to have the same topology as in the simulations (i.e. the same as shown in
    the ``data_fname``). A warning will show up and break the PyMol seesion, if residue names are different or residue
    indices are different.

    As PyMol only recognizes atomistic structures, the PDB coordinates of the receptor (provided by ``pdb_fname``) needs
    to be an atomistic structure. For coarse-grained simulation results, this ``pdb_fname`` needs to be a PDB coordinates
    before the coarse-graining step.

    Parameters
    ----------
    fname : str
        The filename of the PyMol script to be written.
    pdb_fname : str
        The PDB coordinate file of the receptor.
    data_fname : str
        The csv file of the interaction data.
    lipid : str
        Lipid residue name
    n_binding_site : int
        Number of binding site detected (shown in the csv file).

    """
    script_fname = os.path.abspath(fname)
    save_dir, _ = os.path.split(script_fname)
    pdb_new_fname = os.path.join(save_dir, os.path.basename(pdb_fname))
    copyfile(os.path.abspath(pdb_fname), pdb_new_fname)
    data_fname = os.path.abspath(data_fname)

    text = """
import numpy as np
import re
import pymol
from pymol import cmd
pymol.finish_launching()

########## files to process ##########
csv_file = "{CSV_FILE}"
pdb_file = "{PDB}"

########## set the sphere scales to corresponding value ##########
value_to_show = "Residence Time"

###### reading data from csv file ##########
num_of_binding_site = {N_BINDING_SITE}

with open(csv_file, "r") as f:
    data_lines = f.readlines()

column_names = data_lines[0].strip().split(",")
for column_idx, column_name in enumerate(column_names):
    if column_name == "Residue":
        column_id_residue_list = column_idx
    elif column_name == "Residue ID":
        column_id_residue_index = column_idx
    elif column_name == "Binding Site ID":
        column_id_BS = column_idx
    elif column_name == value_to_show:
        column_id_value_to_show = column_idx

residue_list = []
residue_rank_set = []
binding_site_identifiers = []
values_to_show = []
for line in data_lines[1:]:
    data_list = line.strip().split(",")
    residue_list.append(data_list[column_id_residue_list])
    residue_rank_set.append(data_list[column_id_residue_index])
    binding_site_identifiers.append(float(data_list[column_id_BS]))
    values_to_show.append(data_list[column_id_value_to_show])

############## read information from pdb file ##########
with open(pdb_file, "r") as f:
    pdb_lines = f.readlines()
residue_identifiers = []
for line in pdb_lines:
    line_stripped = line.strip()
    if line_stripped[:4] == "ATOM":
        identifier = (line_stripped[22:26].strip(), line_stripped[17:20].strip(), line_stripped[21].strip())
##                           residue index,              resname,                     chain id
        if len(residue_identifiers) == 0:
            residue_identifiers.append(identifier)
        elif identifier != residue_identifiers[-1]:
            residue_identifiers.append(identifier)

######### set sphere scales ###############
values_to_show = np.array(values_to_show, dtype=float)
MIN = np.percentile(np.unique(values_to_show), 5)
MAX = np.percentile(np.unique(values_to_show), 100)
X = (values_to_show - np.percentile(np.unique(values_to_show), 50))/(MAX - MIN)
SCALES = 1/(0.5 + np.exp(-X * 5))

######## some pymol settings #########
cmd.set("retain_order", 1)
cmd.set("cartoon_oval_length", 1.0)
cmd.set("cartoon_oval_width", 0.3)
cmd.set("cartoon_color", "white")
cmd.set("stick_radius", 0.35)

##################################
cmd.load(pdb_file, "Prot_{LIPID}")
prefix = "Prot_{LIPID}"
cmd.hide("everything")
cmd.show("cartoon", prefix)
cmd.center(prefix)
cmd.orient(prefix)
colors = np.array([np.random.choice(np.arange(256, dtype=float), size=3) for dummy in range(num_of_binding_site)])
colors /= 255.0
            """.format(**{"SAVE_DIR": save_dir, "LIPID": lipid, "N_BINDING_SITE": n_binding_site,
                          "PDB": pdb_new_fname, "CSV_FILE": data_fname})

    text += r"""
residue_list = np.array(residue_list, dtype=str)
residue_rank_set = np.array(residue_rank_set, dtype=int)
binding_site_identifiers = np.array(binding_site_identifiers, dtype=int)
residue_identifiers = list(residue_identifiers)
for bs_id in np.arange(num_of_binding_site):
    cmd.set_color("tmp_{}".format(bs_id), list(colors[bs_id]))
    for entry_id in np.where(binding_site_identifiers == bs_id)[0]:
        selected_residue = residue_list[entry_id]
        selected_residue_rank = residue_rank_set[entry_id]
        identifier_from_pdb = residue_identifiers[selected_residue_rank]
        if re.findall("[a-zA-Z]+$", selected_residue)[0] != identifier_from_pdb[1]:
            raise IndexError(
            "The {}th residue in the provided pdb file ({}{}) is different from that in the simulations ({})!".format(
                                                                                            entry_id+1,
                                                                                            identifier_from_pdb[0],
                                                                                            identifier_from_pdb[1],
                                                                                            selected_residue)
                                                                                            )
        if identifier_from_pdb[2] != " ":
            cmd.select("BSid{}_{}".format(bs_id, selected_residue), 
            "chain {} and resid {} and (not name C+O+N)".format(identifier_from_pdb[2], identifier_from_pdb[0]))
        else:
            cmd.select("BSid{}_{}".format(bs_id, selected_residue), 
            "resid {} and (not name C+O+N)".format(identifier_from_pdb[0]))
        cmd.show("spheres", "BSid{}_{}".format(bs_id, selected_residue))
        cmd.set("sphere_scale", SCALES[entry_id], selection="BSid{}_{}".format(bs_id, selected_residue))
        cmd.color("tmp_{}".format(bs_id), "BSid{}_{}".format(bs_id, selected_residue))
    cmd.group("BSid{}".format(bs_id), "BSid{}_*".format(bs_id))

            """
    with open(fname, "w") as f:
        f.write(text)

    return