# NOTE: necessary to plot from pandas
import hvplot.pandas
from math import pi
import numpy as np
import pandas as pd
import panel as pn
import holoviews as hv
from bokeh.palettes import Category10
from bokeh.plotting import figure
from bokeh.transform import cumsum
def get_histogram_df(data, num_bins=25, full_cumulative=True):
"""
Calculate histogram in advance before so we can do lineplot.
"""
df_stats = []
for i, dfi in data.groupby(['gene', 'celltype', 'neighbor_rank']):
# get histogram
max_distance = data['distance_um'].max()
counts, bins = np.histogram(dfi['distance_um'], bins=np.linspace(0, max_distance, num_bins+1))
probs = counts / counts.sum()
bin_centers = (bins[:-1] + bins[1:]) / 2
# long data frames with normal and cumulative prob
df_hist = pd.DataFrame({"prob": probs, "distance_um": bin_centers})
df_hist['stat'] = 'probability'
if full_cumulative: # use all values for cumulative prob.
n = len(dfi)
probs_cumulative = np.linspace(1 / n, 1, n)
probs_cumulative = np.concatenate([[0.0], probs_cumulative, [1.0]])
dists_table = dfi['distance_um'].sort_values().values
dists_table = np.concatenate([[0.0], dists_table, [max_distance]])
df_hist_cum = pd.DataFrame({"prob": probs_cumulative, "distance_um": dists_table})
else: # get cumulative prob. only from histogram bins
probs_cumulative = np.cumsum(probs)
df_hist_cum = pd.DataFrame({"prob": probs_cumulative, "distance_um": bin_centers})
df_hist_cum['stat'] = 'probability_cumulative'
# add both to results
df_stats_i = pd.concat([df_hist, df_hist_cum])
df_stats_i[['gene', 'celltype', 'neighbor_rank']] = i
df_stats.append(df_stats_i)
df_stats = pd.concat(df_stats)
# set colors
df_stats['color'] = df_stats['celltype'].map({'naive': 'blue', 'primed': 'red'})
return df_stats
def get_piechart_num_contacts(df_stats, gene, celltype, distance_cutoff):
"""
Get a Bokeh Pie Chart of fraction of promoters with 0, 1, 2, ... enhancers withing a certain distance.
Following https://docs.bokeh.org/en/latest/docs/user_guide/topics/pie.html
"""
# select just celltype and gene of interest
df_stats = df_stats[(df_stats['celltype'] == celltype) & (df_stats.gene==gene)]
# select just cumulative probability
df_stats = df_stats[df_stats.stat == 'probability_cumulative']
# cumulative probability for each enhancer to be within cutoff (maximum entry below cutoff)
# NOTE: this is the fraction of cells with *at least* n enhancers
prob_atleast_n = df_stats[df_stats.distance_um < distance_cutoff].groupby(['neighbor_rank']).prob.max()
# convert the at-least-n probabilities to exactly-n
# prob(0) = 1 - prob(>=1), prob(1) = prob(>=1) - prob(>=2), ...
prob_exactly_n = np.array(list(prob_atleast_n.shift(1, fill_value=1) - prob_atleast_n) + [prob_atleast_n.iloc[-1]])
df_prob_exactly_n = pd.DataFrame({'prob': prob_exactly_n}).reset_index(names='num_neighbors')
# probabilities to fraction of circle
df_prob_exactly_n['angle'] = df_prob_exactly_n['prob']/df_prob_exactly_n['prob'].sum() * 2*pi
# set colors from palette
df_prob_exactly_n['color'] = Category10[len(df_prob_exactly_n)]
p = figure(height=250, width=300, title=f"{celltype}", toolbar_location=None,
tools="hover", tooltips="@num_neighbors: @prob", x_range=(-0.5, 1.0))
r = p.wedge(x=0, y=1, radius=0.4,
start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
line_color="white", fill_color='color', legend_field='num_neighbors', source=df_prob_exactly_n)
p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None
bokeh_pane = pn.pane.Bokeh(p, theme="dark_minimal")
return bokeh_pane
def get_histogram_plot(df_stats, gene, vline_pos, cumulative=True):
# select gene-of-interest and either normal or cumulative probability
stat_selected = 'probability_cumulative' if cumulative else 'probability'
df_stats = df_stats[(df_stats.gene == gene) & (df_stats.stat == stat_selected)]
# make plot
title = "Distance distribution by neighbor rank"
histogram_plot = (
df_stats.hvplot('distance_um', 'prob', label=title, height=500, width=700, by=['celltype', 'neighbor_rank'], color='color')
* hv.VLine(vline_pos) # add vertical line at chosen cutoff
)
return pn.pane.HoloViews(histogram_plot, align='center')
# load data
data = pd.read_csv('fig2_revision.csv')
# sort by fov and then by distance, to attach neighbor rank
data = data.sort_values(['fov', 'distance_um'])
data['neighbor_rank']= np.concatenate([np.arange(1, len(dfi)+1) for _, dfi in data.groupby('fov')])
pn.extension(design='material')
# widgets for interactive plot settings
cumulative = pn.widgets.Checkbox(name='Cumulative Histogram?', value=True)
gene = pn.widgets.Select(name='Gene', options=list(set(data.gene)), value='Nanog')
num_bins = pn.widgets.IntInput(name='Num. Bins', value=30, start=10, end=80, step=5)
distance_cutoff = pn.widgets.FloatSlider(name='Distance cutoff [µm]', value=0.25, start=0, end=1.5, step=0.01)
# re-calculate histogram df if num_bins changes
df_stats = pn.bind(get_histogram_df, data, num_bins)
# histogram plot and pie charts for naive, primed
hist_plot = pn.bind(get_histogram_plot, df_stats, gene, distance_cutoff, cumulative)
pie_naive = pn.bind(get_piechart_num_contacts, df_stats, gene, "naive", distance_cutoff)
pie_primed = pn.bind(get_piechart_num_contacts, df_stats, gene, "primed", distance_cutoff)
options = pn.WidgetBox("Plot Options", gene, distance_cutoff, cumulative, num_bins, align='center')
pn.Row(options, hist_plot, pn.Column("Fraction of cells with n contacts", pie_naive, pie_primed, align='center'), align='center').servable()