Source code for ClearMap.Visualization.Matplotlib.PlotUtils

# -*- coding: utf-8 -*-
"""
PlotUtils Module
================

Plotting routines for ClearMap based on matplotlib.

Note
----
    This module is using matplotlib.
"""
__author__ = 'Christoph Kirst <christoph.kirst.ck@gmail.com>'
__license__ = 'GPLv3 - GNU General Pulic License v3 (see LICENSE)'
__copyright__ = 'Copyright © 2020 by Christoph Kirst'
__webpage__ = 'https://idisco.info'
__download__ = 'https://www.github.com/ChristophKirst/ClearMap2'

import math
import numpy as np
import pandas as pd

import scipy.stats as st

from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # analysis:ignore

from ClearMap.Alignment import Annotation as annotation
from ClearMap.Analysis.Statistics.data_frame_operations import sanitize_df, normalise_df_column_names

plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['pdf.use14corefonts'] = True
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams["axes.axisbelow"] = False


###############################################################################
# ## Density and Countour plots
###############################################################################


[docs] def plot_density(points, plot_points=True, plot_contour=True, plot_contour_lines=10, n_bins=100, color=None, cmap=plt.cm.gray, xlim=None, ylim=None, verbose=False): """Plot point distributions.""" if xlim is None: xlim = (np.min(points[:, 0]), np.max(points[:, 0])) if ylim is None: ylim = (np.min(points[:, 1]), np.max(points[:, 1])) xmin, xmax = xlim ymin, ymax = ylim # kernel density estimates dx = float(xmax - xmin) / n_bins dy = float(ymax - ymin) / n_bins xx, yy = np.mgrid[xmin:xmax:dx, ymin:ymax:dy] positions = np.vstack([xx.ravel(), yy.ravel()]) kernel = st.gaussian_kde(points.T) if verbose: print('plot_density: kernel estimation done.') density = kernel(positions) density = np.reshape(density, xx.shape) if verbose: print('plot_density: density estimation done.') # figure setup ax = plt.gca() ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) # contour plot if plot_contour: cfset = ax.contourf(xx, yy, density, cmap=cmap) if plot_points: plt.scatter(points[:, 0], points[:, 1], c=color, cmap=cmap, s=1) if plot_contour_lines is not None: cset = ax.contour(xx, yy, density, plot_contour_lines, cmap=cmap) return kernel, (xx, yy, density)
[docs] def plot_curve(coordinates, **kwargs): """Plot a curve in 3d. Arguments --------- coordinates : nx3 array. The coordinates of the curve. kwargs Matplotlib parameter. Returns ------- ax : ax 3d axes object. """ ax = plt.gca(projection='3d') x, y, z = coordinates.T ax.plot(x, y, z, **kwargs) return ax
[docs] def subplot_tiling(n, tiling=None): """Finds a good tiling to arrange subplots. Arguments --------- n : int Number of subplots. tiling : None, 'automatic, int or tuple The tiling to use. If None or 'automatic' calculate automatically. If number use this for the number of subplots along the horizontal axis. If tuple, (nx,ny) nx and ny can be numbes or None to indicate the number of sub-plots in each axis. Iif one of them is None, it will be determined automatically to fit the total number of plots. Returns ------- tiling : tuple of int The subplot tiling. """ if tiling is None: tiling = 'automatic' if tiling == "automatic": nx = math.floor(math.sqrt(n)) ny = int(math.ceil(n / nx)) nx = int(nx) else: if not isinstance(tiling, tuple): tiling = (tiling, None) if tiling[0] is None: ny = tiling[1] if ny is None: return subplot_tiling(n) nx = int(math.ceil(n / ny)) if tiling[1] is None: nx = tiling[0] if nx is None: return subplot_tiling(n) ny = int(math.ceil(n / nx)) return nx, ny
[docs] def handle_fig_fate(fig, show, save_path): if save_path: plt.savefig(save_path) else: if show: plt.show() else: return fig
[docs] def plot_sample_stats_histogram(stats_df, aba_df, sort_by_order=False, split_criterion='hemisphere', metric_name='density', value_cutoff=0.05, fold_threshold=5, fold_regions=False, save_path=None, show=True): stats_df = sanitize_df(stats_df) is_gp_stats = is_gp_stats_df(stats_df) if is_gp_stats: group_names = [k.replace('mean_', '') for k in stats_df.columns if k.startswith('mean_')] stats_df = gp_stats_df_to_counts(stats_df) # Convert to same format for simplicity stats_df = normalise_df_column_names(stats_df) if fold_regions: stats_df = fold_sample_stats_dataframe(stats_df, aba_df, fold_threshold) stats_df['color'] = get_structure_colors(stats_df) # Compute metric, sort and filter after fold if metric_name == 'density': stats_df['density'] = stats_df['cell_counts'] / stats_df['structure_volume'] stats_df = stats_df.sort_values(metric_name, ascending=False) high_values = np.unique(stats_df[stats_df[metric_name] >= value_cutoff]['structure_id']) stats_df = stats_df[stats_df['structure_id'].isin(high_values)] fig, axes = plt.subplots(figsize=(10, 40), facecolor='#eaeaf2', ncols=2, sharey=True, gridspec_kw={'wspace': 0.2}) plt.xlabel(f'Cell {metric_name}') if split_criterion == 'hemisphere': titles = ['Left', 'Right'] elif split_criterion == 'group_id': titles = group_names else: raise NotImplementedError(f'Split criterion {split_criterion} is not recognised yet, ' f'supported values are "hemisphere" and "group_id"') for i, split_val in enumerate(np.sort(np.unique(stats_df[split_criterion]))): # Split and plot sub_df = stats_df[stats_df[split_criterion] == split_val] plot_sub_df(sub_df, i, axes, titles, metric_name, sort_by_order) axes[0].invert_xaxis() # FIXME: check that correct axis plt.subplots_adjust(wspace=0, top=0.85, bottom=0.1, left=0.18, right=0.95) return handle_fig_fate(fig, show, save_path)
[docs] def is_gp_stats_df(df): return 'p_value' in df.columns
[docs] def gp_stats_df_to_counts(df): """ stack the df by group name The only counts are the average for the group A new column identifies the group. It is aimed to make it interchangeable with the DF of an individual sample Parameters ---------- df pd.DataFrame Returns pd.DataFrame ------- """ group_names = [k.replace('mean_', '') for k in df.columns if k.startswith('mean_')] # gp1_name, gp2_name = group_names out = pd.DataFrame({ 'structure_id': np.tile(df['id'].values, 2), 'structure_name': np.tile(df['name'].values, 2), 'structure_volume': np.tile(df['volume'].values, 2), 'hemisphere': np.tile(df['hemisphere'].values, 2), 'group_id': np.hstack([np.repeat(group_names[i], df.shape[0]) for i in range(len(group_names))]), 'cell_counts': np.hstack([df[df[f'mean_{group_names[i]}']][f'mean_{group_names[i]}'].values for i in range(len(group_names))]) }) return out
[docs] def plot_sub_df(sub_df, i, axes, titles, criterion_name, sort_by_order=False, struct_name_str='Structure name', struct_acronym_str='Acronym', struct_color_str='color', struct_order_str='Structure order'): if sort_by_order: sub_df = sub_df.sort_values(struct_order_str, ascending=False) ax = axes[i] ax.barh(sub_df[struct_name_str], sub_df[criterion_name], align='center', color=sub_df[struct_color_str], zorder=10) ax.set_title(titles[i], fontsize=18, pad=15) ax.tick_params(left=False, right=False) ax.tick_params(axis='y', labelsize=15, zorder=0) if i != 0: ax.set(yticklabels=sub_df[struct_acronym_str]) else: ax.spines['left'].set_position(('axes', 1)) # ax.spines['bottom'].set_position(('axes', 1)) ax.tick_params(axis='y', direction='in') # Eliminate upper and right axes ax.spines['top'].set_color('none') ax.spines['right'].set_color('none')
[docs] def get_structure_colors(stats_df, id_col_name='Structure ID'): return [annotation.find(_id, key='id')['rgb'] for _id in stats_df[id_col_name]]
[docs] def fold_sample_stats_dataframe(df, aba_df, fold_threshold): df['fold_parent'] = [get_parent_id(aba_df, _id, fold_threshold) for _id in df['structure_id']] # TODO: extract df = df.sort_values('structure_name') output = pd.DataFrame() for hem_id in np.sort(np.unique(df['hemisphere'])): sub_df = df[df['hemisphere'] == hem_id] grouped = sub_df.groupby('fold_parent') half_df = pd.DataFrame() half_df['cell_counts'] = grouped['cell_counts'].sum() half_df['structure_volume'] = grouped['structure_volume'].sum() if 'average_cell_size' in sub_df.columns: half_df['average_cell_size'] = grouped['average_cell_size'].mean() half_df['structure_id'] = grouped['fold_parent'].first() ids = half_df['structure_id'].values half_df['structure_order'] = get_prop_from_aba_df(aba_df, ids, 'graph_order') half_df['structure_name'] = get_prop_from_aba_df(aba_df, ids, 'name') half_df['acronym'] = get_prop_from_aba_df(aba_df, ids, 'acronym') half_df['hemisphere'] = hem_id output = pd.concat((output, half_df)) # , ignore_index=True) return output
[docs] def get_prop_from_aba_df(aba_df, ids, prop_name): return [aba_df[aba_df['id'] == _id][prop_name].values[0] for _id in ids]
[docs] def get_parent_structure(df, _id, level): current_structure = df[df['id'] == _id] current_level = current_structure['custom_level'].values[0] if current_level <= level: return current_structure else: parent_id = current_structure['parent_structure_id'].values[0] parent_structure = df[df['id'] == parent_id] try: parent_level = parent_structure['custom_level'].values[0] except IndexError: raise ValueError(f'Failed to get parent level for {parent_structure}, with ID: {_id} and level {level}') if parent_level <= level: return parent_structure else: return get_parent_structure(df, parent_id, level)
[docs] def get_parent_id(df, _id, level): return get_parent_structure(df, _id, level)['id'].values[0]
[docs] def get_parent_name(df, _id, level): return get_parent_structure(df, _id, level)['name'].values[0]
[docs] def plot_volcano(df, group_names, p_cutoff=0.05, show=False, save_path=''): mean_0 = df[f'mean_{group_names[0]}'].values mean_1 = df[f'mean_{group_names[1]}'].values ratio = np.log2(mean_1 / mean_0) p_values = df['p_value'].values ids = df['id'].values colors = np.array([annotation.find(_id, key='id')['rgb'] for _id in ids]) for i, col in enumerate(colors): hsv = rgb_to_hsv(col) if p_values[i] > p_cutoff: hsv[1] *= 0.3 # desaturate non significant p values colors[i] = hsv_to_rgb(hsv) # colors[p_values > p_cutoff] = (0.8, 0.8, 0.8) fig = plt.figure() plt.scatter(ratio, -np.log10(p_values), marker='o', linestyle='None', color=colors, edgecolors=(0.9, 0.9, 0.9)) plt.xlim((-5, 5)) plt.hlines(-np.log10(p_cutoff), -5, 5, linestyles='dashed', color='grey') plt.xlabel('log2(fold change)') plt.ylabel('- log10(p value)') return handle_fig_fate(fig, show, save_path)