Source code for pylipid.plot.plot1d

##############################################################################
# PyLipID: A python module for analysing protein-lipid interactions
#
# Author: Wanling Song
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
##############################################################################

"""This module contains functions that plot interactions as a function of residue index.
"""

import os
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import numpy as np
import pandas as pd
import logomaker

__all__ = ["plot_residue_data", "plot_residue_data_logo",
           "plot_binding_site_data", "plot_surface_area", "AxisIndex"]


[docs]def plot_residue_data(residue_index, interactions, gap=200, ylabel=None, fn=None, title=None, fig_close=False): """Plot interactions as a function of residue index Parameters ---------- residue_index : list Residue indices in an ascending order. If a residue index is smaller than its preceding one, the plotting function will consider it as the start of a new chain and will plot the following data in a new figure. A gap in residue index that is less than ``gap`` will be marked as gray areas in the figure, but a gap that is larger than ``gap`` will start a new figure. interactions : list Values to plot. In the same order as ``residue_index``. gap : int, optional, default=200 The number of missing residues in ``residue_index`` that initiate a new figure. The gap between two adjacent index in ``residue_index`` that is smaller than the provided value will be considered as missing residues and will be marked as gray areas in the figure, whereas a gap that is larger than the provided value will start a new figure and plot the following data in that new figure. This can help to make figures more compressed. The default gap is 200. ylabel : str, optional, default=None y axis label. Default is "Interactions". fn : str, optional, default=None Figure name. By default the figure is saved as "Figure_interactions.pdf" as the current working directory. title : str, optional, default=None Figure title. fig_close : bool, optional, default=False Use plt.close() to close the figure. Can be used to save memory if many figures are opened. """ bar_color = "#176BA0" if ylabel is None: ylabel = "Interactions" if fn is None: fn = os.path.join(os.getcwd(), "Figure_interactions.pdf") # check for chain breaks gray_areas = defaultdict(list) # show grey area to indicate missing residues chain_starts = [0] # plot in separate figures if the gap between two adjacent residues is larger than 50 for idx in np.arange(1, len(residue_index)): if residue_index[idx] - residue_index[idx - 1] < 0: chain_starts.append(idx) elif residue_index[idx] - residue_index[idx - 1] > gap: chain_starts.append(idx) elif 1 < residue_index[idx] - residue_index[idx - 1] <= gap: gray_areas[chain_starts[-1]].append([residue_index[idx - 1] + 1, residue_index[idx] - 1]) chain_starts.append(len(residue_index)) # plot for chain_idx in np.arange(len(chain_starts[:-1])): df = interactions[chain_starts[chain_idx]:chain_starts[chain_idx + 1]] resi_selected = residue_index[chain_starts[chain_idx]:chain_starts[chain_idx + 1]] if 0 < len(df) <= 20: fig, ax = plt.subplots(1, 1, figsize=(2.8, 1.5)) ax.xaxis.set_major_locator(MultipleLocator(5)) ax.xaxis.set_minor_locator(MultipleLocator(1)) elif 20 < len(df) <= 50: fig, ax = plt.subplots(1, 1, figsize=(3.2, 1.5)) ax.xaxis.set_major_locator(MultipleLocator(10)) ax.xaxis.set_minor_locator(MultipleLocator(1)) elif 50 < len(df) <= 300: fig, ax = plt.subplots(1, 1, figsize=(3.8, 1.8)) ax.xaxis.set_major_locator(MultipleLocator(50)) ax.xaxis.set_minor_locator(MultipleLocator(10)) elif 300 < len(df) <= 1000: fig, ax = plt.subplots(1, 1, figsize=(4.5, 1.8)) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(MultipleLocator(10)) elif 1000 < len(df) <= 2000: fig, ax = plt.subplots(1, 1, figsize=(6.0, 1.8)) ax.xaxis.set_major_locator(MultipleLocator(200)) ax.xaxis.set_minor_locator(MultipleLocator(50)) elif len(df) > 2000: fig, ax = plt.subplots(1, 1, figsize=(7.5, 1.8)) ax.xaxis.set_major_locator(MultipleLocator(500)) ax.xaxis.set_minor_locator(MultipleLocator(100)) ax.bar(resi_selected, df, 1.0, linewidth=0, color=bar_color) # plot missing residue area if chain_starts[chain_idx] in gray_areas.keys(): for gray_area in gray_areas[chain_starts[chain_idx]]: ax.axvspan(gray_area[0], gray_area[1], facecolor="#c0c0c0", alpha=0.3) # axis setting ax.set_ylim(0, df.max() * 1.05) ax.set_xlim(resi_selected.min() - 1, resi_selected.max() + 1) ax.set_ylabel(ylabel, fontsize=8, weight="bold") ax.set_xlabel("Residue Index", fontsize=8, weight="bold") for label in ax.xaxis.get_ticklabels() + ax.yaxis.get_ticklabels(): plt.setp(label, fontsize=8, weight="bold") if title is not None: ax.set_title(title, fontsize=8, weight="bold") plt.tight_layout() if len(chain_starts) == 2: fig.savefig(fn, dpi=300) else: name, ext = os.path.splitext(fn) fig.savefig("{}_{}{}".format(name, chain_idx, ext), dpi=300) if fig_close: plt.close() return
[docs]def plot_binding_site_data(data, fig_fn, ylabel=None, title=None, fig_close=False): """Plot binding site data in a matplotlib violin plot. The provided ``data`` needs to be a pandas.DataFrame object which has "Binding Site {idx}" as its column names and records binding site information by column. Parameters ---------- data : padnas.DataFrame Data to plot. It needs to have "Binding Site {idx}" as its column names and records binding site information by column. fig_fn : str Figure name. ylabel : str, optional, default=None Y label. title : str, optional, default=None Figure title. fig_close : bool, optional, default=False Use plt.close() to close the figure. Can be used to save memory if many figures are opened. """ from itertools import cycle as _cycle def adjacent_values(vals, q1, q3): upper_adjacent_value = q3 + (q3 - q1) * 1.5 upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1]) lower_adjacent_value = q1 - (q3 - q1) * 1.5 lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1) return lower_adjacent_value, upper_adjacent_value if ylabel is None: ylabel = "" if title is None: title = "" color_set = _cycle(plt.get_cmap("tab10").colors) plt.rcParams["font.size"] = 10 plt.rcParams["font.weight"] = "bold" BS_names = [col for col in data.columns] BS_id_set = [int(name.split()[-1]) for name in BS_names] BS_id_set.sort() data_processed = [np.sort(data["Binding Site {}".format(bs_id)].dropna().tolist()) for bs_id in BS_id_set] colors = [next(color_set) for dummy in BS_id_set] fig, ax = plt.subplots(1, 1, figsize=(len(BS_id_set)*0.6, 2.8)) plt.subplots_adjust(bottom=0.20, top=0.83) ax.set_title(title, fontsize=10, weight="bold") parts = ax.violinplot(data_processed, showmeans=False, showmedians=False, showextrema=False) for pc_idx, pc in enumerate(parts['bodies']): pc.set_facecolors(colors[pc_idx]) pc.set_edgecolor('black') pc.set_alpha(1) # deal with the situation in which the columns in data have different lengths. quartile1, medians, quartile3 = np.array([np.percentile(d, [25, 50, 75]) for d in data_processed]).T whiskers = np.array([ adjacent_values(sorted_array, q1, q3) for sorted_array, q1, q3 in zip(data_processed, quartile1, quartile3)]) whiskers_min, whiskers_max = whiskers[:, 0], whiskers[:, 1] inds = np.arange(1, len(medians) + 1) ax.scatter(inds, medians, marker='o', color='white', s=3, zorder=3) ax.vlines(inds, quartile1, quartile3, color='k', linestyle='-', lw=5) ax.vlines(inds, whiskers_min, whiskers_max, color='k', linestyle='-', lw=1) ax.get_xaxis().set_tick_params(direction='out') ax.xaxis.set_ticks_position('bottom') ax.set_xticks(np.arange(1, len(BS_id_set) + 1)) ax.set_xticklabels(BS_id_set, fontsize=10, weight="bold") ax.set_xlim(0.25, len(BS_id_set) + 0.75) ax.set_xlabel('Binding Site', fontsize=10, weight="bold") ax.set_ylabel(ylabel, fontsize=10, weight="bold") plt.tight_layout() fig.savefig(fig_fn, dpi=200) if fig_close: plt.close() return
[docs]def plot_surface_area(surface_area, fig_fn, timeunit=None, fig_close=False): """Plot binding site surface area as a function of time. The provided ``surface_area`` needs to be a pandas.DataFrame object, which has columns named as "Binding Site {idx}" to record binding site surface areas and a column named "Time" to record the timesteps at which the surface area data are taken. Parameters ---------- surface_area : pandas.DataFrame A pandas.DataFrame object which has columns named as "Binding Site {idx}" to record binding site surface areas and a column named "Time" to record the timesteps at which the surface area data fig_fn : str Figure filename. timeunit : {"ns", "us"} or None, optional, default=None Time unit shown in x label. fig_close : bool, optional, default=False Use plt.close() to close the figure. Can be used to save memory if many figures are opened. See also --------- pylipid.func.calculate_surface_area The function that generates surface_area data. """ from itertools import cycle as _cycle color_set = _cycle(plt.get_cmap("tab10").colors) plt.rcParams["font.size"] = 10 plt.rcParams["font.weight"] = "normal" if timeunit is None: timeunit = "" elif timeunit == "ns": timeunit = " (ns)" elif timeunit == "us": timeunit = r" ($\mu$s)" row_set = list(set([ind[:2] for ind in surface_area.index])) row_set.sort() col_set = [col for col in surface_area.columns if col != "Time"] colors = [next(color_set) for dummy in col_set] fig, axes = plt.subplots(len(row_set), len(col_set), figsize=(len(col_set)*2.4, len(row_set)*1.6), sharex=True, sharey=True) plt.subplots_adjust(wspace=0.2, hspace=0.16) if len(col_set) == 1: axes = np.atleast_1d(axes)[:, np.newaxis] else: axes = np.atleast_2d(axes) for row_idx, row in enumerate(row_set): df = surface_area.loc[row] for col_idx, bs_name in enumerate(col_set): axes[row_idx, col_idx].plot(df["Time"], df[bs_name], color=colors[col_idx], label="traj {} prot {}".format(row[0], row[1])) if row_idx == len(row_set)-1: axes[row_idx, col_idx].set_xlabel("Time{}".format(timeunit), fontsize=10) if col_idx == 0: axes[row_idx, col_idx].set_ylabel(r"Area (nm$^2$)", fontsize=10) if row_idx == 0: axes[row_idx, col_idx].set_title(bs_name, fontsize=10) axes[row_idx, col_idx].legend(loc="best", frameon=False) fig.tight_layout() fig.savefig(fig_fn, dpi=200) if fig_close: plt.close() return
class AxisIndex: """Build axes for logo figure.""" def __init__(self, residue_index, logos, interactions, length, gap): self.page_idx = 0 self.length = length self.gap = gap self.residue_index = residue_index self.logos = logos self.interactions = interactions self.axis_start = (residue_index[0] // length) * length self.breaks = defaultdict(list) self.breaks[self.page_idx].append([]) self.gray_areas = defaultdict(list) def fill_missing(self, start_value, end_value): for xloci in np.arange(start_value, end_value + 1): self.breaks[self.page_idx][-1].append((xloci, "A", 0)) self.gray_areas[self.page_idx].append((len(self.breaks[self.page_idx]) - 1, start_value, end_value)) def new_axis(self, pointer): self.breaks[self.page_idx].append([]) self.axis_start = self.residue_index[pointer] self.breaks[self.page_idx][-1].append( (self.residue_index[pointer], self.logos[pointer], self.interactions[pointer])) def new_page(self, pointer): if len(self.breaks[self.page_idx][-1]) < self.length: self.fill_missing(self.axis_start + len(self.breaks[self.page_idx][-1]), self.axis_start + self.length - 1) self.page_idx += 1 self.breaks[self.page_idx].append([]) self.axis_start = (self.residue_index[pointer] // self.length) * self.length if self.axis_start != self.residue_index[pointer]: self.fill_missing(self.axis_start, self.residue_index[pointer] - 1) self.breaks[self.page_idx][-1].append( (self.residue_index[pointer], self.logos[pointer], self.interactions[pointer])) def new_gap(self, pointer): gray_start = self.residue_index[pointer - 1] + 1 for xloci in np.arange(self.residue_index[pointer - 1] + 1, self.residue_index[pointer]): if xloci - self.axis_start < self.length: self.breaks[self.page_idx][-1].append((xloci, "A", 0)) else: self.gray_areas[self.page_idx].append((len(self.breaks[self.page_idx]) - 1, gray_start, xloci - 1)) self.breaks[self.page_idx].append([]) self.breaks[self.page_idx][-1].append((xloci, "A", 0)) self.axis_start = xloci gray_start = xloci self.gray_areas[self.page_idx].append( (len(self.breaks[self.page_idx]) - 1, gray_start, self.residue_index[pointer] - 1)) self.breaks[self.page_idx][-1].append( (self.residue_index[pointer], self.logos[pointer], self.interactions[pointer])) def sort(self): end = False if self.axis_start != self.residue_index[0]: self.fill_missing(self.axis_start, self.residue_index[0] - 1) self.breaks[self.page_idx][-1].append((self.residue_index[0], self.logos[0], self.interactions[0])) pointer = 1 while not end: if self.residue_index[pointer] - self.residue_index[pointer - 1] == 1 and self.residue_index[ pointer] - self.axis_start < self.length: self.breaks[self.page_idx][-1].append( (self.residue_index[pointer], self.logos[pointer], self.interactions[pointer])) pointer += 1 elif self.residue_index[pointer] - self.residue_index[pointer - 1] == 1 and self.residue_index[ pointer] - self.axis_start >= self.length: self.new_axis(pointer) pointer += 1 elif self.residue_index[pointer] - self.residue_index[pointer - 1] < 0: self.new_page(pointer) pointer += 1 elif 1 < self.residue_index[pointer] - self.residue_index[pointer - 1] <= self.gap: self.new_gap(pointer) pointer += 1 elif self.residue_index[pointer] - self.residue_index[pointer - 1] > self.gap: self.new_page(pointer) pointer += 1 if pointer == len(self.residue_index): end = True if len(self.breaks[self.page_idx][-1]) < self.length: self.fill_missing(self.axis_start + len(self.breaks[self.page_idx][-1]), self.axis_start + self.length - 1)